Repository: google/benchmark Branch: main Commit: d4393d5445b4 Files: 203 Total size: 983.8 KB Directory structure: gitextract_rtytoi0j/ ├── .bazelversion ├── .clang-format ├── .clang-tidy ├── .clang-tidy.ignore ├── .github/ │ ├── ISSUE_TEMPLATE/ │ │ ├── bug_report.md │ │ └── feature_request.md │ ├── dependabot.yml │ ├── install_bazel.sh │ ├── libcxx-setup.sh │ └── workflows/ │ ├── bazel.yml │ ├── build-and-test-min-cmake.yml │ ├── build-and-test-perfcounters.yml │ ├── build-and-test.yml │ ├── clang-format-lint.yml │ ├── clang-tidy-lint.yml │ ├── doxygen.yml │ ├── ossf.yml │ ├── pre-commit.yml │ ├── sanitizer.yml │ ├── test_bindings.yml │ └── wheels.yml ├── .gitignore ├── .pre-commit-config.yaml ├── .ycm_extra_conf.py ├── AUTHORS ├── BUILD.bazel ├── CMakeLists.txt ├── CONTRIBUTING.md ├── CONTRIBUTORS ├── LICENSE ├── MODULE.bazel ├── README.md ├── WORKSPACE ├── WORKSPACE.bzlmod ├── _config.yml ├── appveyor.yml ├── bazel/ │ └── benchmark_deps.bzl ├── bindings/ │ └── python/ │ └── google_benchmark/ │ ├── BUILD │ ├── __init__.py │ ├── benchmark.cc │ └── example.py ├── cmake/ │ ├── AddCXXCompilerFlag.cmake │ ├── CXXFeatureCheck.cmake │ ├── Config.cmake.in │ ├── GetGitVersion.cmake │ ├── GoogleTest.cmake │ ├── GoogleTest.cmake.in │ ├── benchmark.pc.in │ ├── benchmark_main.pc.in │ ├── gnu_posix_regex.cpp │ ├── llvm-toolchain.cmake │ ├── posix_regex.cpp │ ├── pthread_affinity.cpp │ ├── split_list.cmake │ ├── std_regex.cpp │ ├── steady_clock.cpp │ └── thread_safety_attributes.cpp ├── docs/ │ ├── AssemblyTests.md │ ├── _config.yml │ ├── assets/ │ │ └── images/ │ │ ├── icon.xcf │ │ └── icon_black.xcf │ ├── dependencies.md │ ├── index.md │ ├── perf_counters.md │ ├── platform_specific_build_instructions.md │ ├── python_bindings.md │ ├── random_interleaving.md │ ├── reducing_variance.md │ ├── releasing.md │ ├── tools.md │ └── user_guide.md ├── include/ │ └── benchmark/ │ ├── benchmark.h │ ├── benchmark_api.h │ ├── counter.h │ ├── export.h │ ├── macros.h │ ├── managers.h │ ├── registration.h │ ├── reporter.h │ ├── state.h │ ├── statistics.h │ ├── sysinfo.h │ ├── types.h │ └── utils.h ├── pyproject.toml ├── setup.py ├── src/ │ ├── CMakeLists.txt │ ├── arraysize.h │ ├── benchmark.cc │ ├── benchmark_api_internal.cc │ ├── benchmark_api_internal.h │ ├── benchmark_main.cc │ ├── benchmark_name.cc │ ├── benchmark_register.cc │ ├── benchmark_register.h │ ├── benchmark_runner.cc │ ├── benchmark_runner.h │ ├── check.cc │ ├── check.h │ ├── colorprint.cc │ ├── colorprint.h │ ├── commandlineflags.cc │ ├── commandlineflags.h │ ├── complexity.cc │ ├── complexity.h │ ├── console_reporter.cc │ ├── counter.cc │ ├── counter.h │ ├── csv_reporter.cc │ ├── cycleclock.h │ ├── internal_macros.h │ ├── json_reporter.cc │ ├── log.h │ ├── mutex.h │ ├── perf_counters.cc │ ├── perf_counters.h │ ├── re.h │ ├── reporter.cc │ ├── statistics.cc │ ├── statistics.h │ ├── string_util.cc │ ├── string_util.h │ ├── sysinfo.cc │ ├── thread_manager.h │ ├── thread_timer.h │ ├── timers.cc │ └── timers.h ├── test/ │ ├── AssemblyTests.cmake │ ├── BUILD │ ├── CMakeLists.txt │ ├── args_product_test.cc │ ├── basic_test.cc │ ├── benchmark_gtest.cc │ ├── benchmark_min_time_flag_iters_test.cc │ ├── benchmark_min_time_flag_time_test.cc │ ├── benchmark_name_gtest.cc │ ├── benchmark_random_interleaving_gtest.cc │ ├── benchmark_setup_teardown_cb_types_gtest.cc │ ├── benchmark_setup_teardown_test.cc │ ├── benchmark_test.cc │ ├── clobber_memory_assembly_test.cc │ ├── commandlineflags_gtest.cc │ ├── complexity_test.cc │ ├── cxx11_test.cc │ ├── diagnostics_test.cc │ ├── display_aggregates_only_test.cc │ ├── donotoptimize_assembly_test.cc │ ├── donotoptimize_test.cc │ ├── filter_test.cc │ ├── fixture_test.cc │ ├── internal_threading_test.cc │ ├── link_main_test.cc │ ├── locale_impermeability_test.cc │ ├── manual_threading_test.cc │ ├── map_test.cc │ ├── memory_manager_test.cc │ ├── memory_results_gtest.cc │ ├── min_time_parse_gtest.cc │ ├── multiple_ranges_test.cc │ ├── options_test.cc │ ├── output_test.h │ ├── output_test_helper.cc │ ├── overload_test.cc │ ├── perf_counters_gtest.cc │ ├── perf_counters_test.cc │ ├── profiler_manager_gtest.cc │ ├── profiler_manager_iterations_test.cc │ ├── profiler_manager_test.cc │ ├── register_benchmark_test.cc │ ├── repetitions_test.cc │ ├── report_aggregates_only_test.cc │ ├── reporter_output_test.cc │ ├── skip_with_error_test.cc │ ├── spec_arg_test.cc │ ├── spec_arg_verbosity_test.cc │ ├── state_assembly_test.cc │ ├── statistics_gtest.cc │ ├── string_util_gtest.cc │ ├── templated_fixture_method_test.cc │ ├── templated_fixture_test.cc │ ├── time_unit_gtest.cc │ ├── user_counters_tabular_test.cc │ ├── user_counters_test.cc │ ├── user_counters_thousands_test.cc │ └── user_counters_threads_test.cc └── tools/ ├── BUILD.bazel ├── compare.py ├── gbench/ │ ├── Inputs/ │ │ ├── test1_run1.json │ │ ├── test1_run2.json │ │ ├── test2_run.json │ │ ├── test3_run0.json │ │ ├── test3_run1.json │ │ ├── test4_run.json │ │ ├── test4_run0.json │ │ ├── test4_run1.json │ │ ├── test5_run0.json │ │ └── test5_run1.json │ ├── __init__.py │ ├── report.py │ └── util.py ├── libpfm.BUILD.bazel ├── requirements.txt └── strip_asm.py ================================================ FILE CONTENTS ================================================ ================================================ FILE: .bazelversion ================================================ 8.2.1 ================================================ FILE: .clang-format ================================================ --- Language: Cpp BasedOnStyle: Google PointerAlignment: Left ... ================================================ FILE: .clang-tidy ================================================ --- Checks: > abseil-*, bugprone-*, clang-analyzer-*, cppcoreguidelines-*, google-*, misc-*, performance-*, readability-*, -clang-analyzer-deadcode*, -clang-analyzer-optin*, -readability-identifier-length WarningsAsErrors: '' HeaderFilterRegex: '' FormatStyle: none CheckOptions: llvm-else-after-return.WarnOnConditionVariables: 'false' modernize-loop-convert.MinConfidence: reasonable modernize-replace-auto-ptr.IncludeStyle: llvm cert-str34-c.DiagnoseSignedUnsignedCharComparisons: 'false' google-readability-namespace-comments.ShortNamespaceLines: '10' cert-err33-c.CheckedFunctions: '::aligned_alloc;::asctime_s;::at_quick_exit;::atexit;::bsearch;::bsearch_s;::btowc;::c16rtomb;::c32rtomb;::calloc;::clock;::cnd_broadcast;::cnd_init;::cnd_signal;::cnd_timedwait;::cnd_wait;::ctime_s;::fclose;::fflush;::fgetc;::fgetpos;::fgets;::fgetwc;::fopen;::fopen_s;::fprintf;::fprintf_s;::fputc;::fputs;::fputwc;::fputws;::fread;::freopen;::freopen_s;::fscanf;::fscanf_s;::fseek;::fsetpos;::ftell;::fwprintf;::fwprintf_s;::fwrite;::fwscanf;::fwscanf_s;::getc;::getchar;::getenv;::getenv_s;::gets_s;::getwc;::getwchar;::gmtime;::gmtime_s;::localtime;::localtime_s;::malloc;::mbrtoc16;::mbrtoc32;::mbsrtowcs;::mbsrtowcs_s;::mbstowcs;::mbstowcs_s;::memchr;::mktime;::mtx_init;::mtx_lock;::mtx_timedlock;::mtx_trylock;::mtx_unlock;::printf_s;::putc;::putwc;::raise;::realloc;::remove;::rename;::scanf;::scanf_s;::setlocale;::setvbuf;::signal;::snprintf;::snprintf_s;::sprintf;::sprintf_s;::sscanf;::sscanf_s;::strchr;::strerror_s;::strftime;::strpbrk;::strrchr;::strstr;::strtod;::strtof;::strtoimax;::strtok;::strtok_s;::strtol;::strtold;::strtoll;::strtoul;::strtoull;::strtoumax;::strxfrm;::swprintf;::swprintf_s;::swscanf;::swscanf_s;::thrd_create;::thrd_detach;::thrd_join;::thrd_sleep;::time;::timespec_get;::tmpfile;::tmpfile_s;::tmpnam;::tmpnam_s;::tss_create;::tss_get;::tss_set;::ungetc;::ungetwc;::vfprintf;::vfprintf_s;::vfscanf;::vfscanf_s;::vfwprintf;::vfwprintf_s;::vfwscanf;::vfwscanf_s;::vprintf_s;::vscanf;::vscanf_s;::vsnprintf;::vsnprintf_s;::vsprintf;::vsprintf_s;::vsscanf;::vsscanf_s;::vswprintf;::vswprintf_s;::vswscanf;::vswscanf_s;::vwprintf_s;::vwscanf;::vwscanf_s;::wcrtomb;::wcschr;::wcsftime;::wcspbrk;::wcsrchr;::wcsrtombs;::wcsrtombs_s;::wcsstr;::wcstod;::wcstof;::wcstoimax;::wcstok;::wcstok_s;::wcstol;::wcstold;::wcstoll;::wcstombs;::wcstombs_s;::wcstoul;::wcstoull;::wcstoumax;::wcsxfrm;::wctob;::wctrans;::wctype;::wmemchr;::wprintf_s;::wscanf;::wscanf_s;' cert-oop54-cpp.WarnOnlyIfThisHasSuspiciousField: 'false' cert-dcl16-c.NewSuffixes: 'L;LL;LU;LLU' google-readability-braces-around-statements.ShortStatementLines: '1' cppcoreguidelines-non-private-member-variables-in-classes.IgnoreClassesWithAllMemberVariablesBeingPublic: 'true' google-readability-namespace-comments.SpacesBeforeComments: '2' modernize-loop-convert.MaxCopySize: '16' modernize-pass-by-value.IncludeStyle: llvm modernize-use-nullptr.NullMacros: 'NULL' llvm-qualified-auto.AddConstToQualified: 'false' modernize-loop-convert.NamingStyle: CamelCase llvm-else-after-return.WarnOnUnfixable: 'false' google-readability-function-size.StatementThreshold: '800' ... ================================================ FILE: .clang-tidy.ignore ================================================ .*third_party/.* ================================================ FILE: .github/ISSUE_TEMPLATE/bug_report.md ================================================ --- name: Bug report about: Create a report to help us improve title: "[BUG]" labels: '' assignees: '' --- **Describe the bug** A clear and concise description of what the bug is. **System** Which OS, compiler, and compiler version are you using: - OS: - Compiler and version: **To reproduce** Steps to reproduce the behavior: 1. sync to commit ... 2. cmake/bazel... 3. make ... 4. See error **Expected behavior** A clear and concise description of what you expected to happen. **Screenshots** If applicable, add screenshots to help explain your problem. **Additional context** Add any other context about the problem here. ================================================ FILE: .github/ISSUE_TEMPLATE/feature_request.md ================================================ --- name: Feature request about: Suggest an idea for this project title: "[FR]" labels: '' assignees: '' --- **Is your feature request related to a problem? Please describe.** A clear and concise description of what the problem is. Ex. I'm always frustrated when [...] **Describe the solution you'd like** A clear and concise description of what you want to happen. **Describe alternatives you've considered** A clear and concise description of any alternative solutions or features you've considered. **Additional context** Add any other context or screenshots about the feature request here. ================================================ FILE: .github/dependabot.yml ================================================ version: 2 updates: - package-ecosystem: github-actions directory: / schedule: interval: daily - package-ecosystem: pip directory: /tools schedule: interval: daily ================================================ FILE: .github/install_bazel.sh ================================================ if ! bazel version; then arch=$(uname -m) if [ "$arch" == "aarch64" ]; then arch="arm64" fi echo "Downloading $arch Bazel binary from GitHub releases." curl -L -o $HOME/bin/bazel --create-dirs "https://github.com/bazelbuild/bazel/releases/download/8.2.0/bazel-8.2.0-linux-$arch" chmod +x $HOME/bin/bazel else # Bazel is installed for the correct architecture exit 0 fi ================================================ FILE: .github/libcxx-setup.sh ================================================ #!/usr/bin/env bash set -e # Checkout LLVM sources git clone --filter=blob:none --depth=1 --branch llvmorg-19.1.6 --no-checkout https://github.com/llvm/llvm-project.git llvm-project cd llvm-project git sparse-checkout set --cone git checkout llvmorg-19.1.6 git sparse-checkout set cmake llvm/cmake runtimes libcxx libcxxabi cd .. ## Setup libc++ options if [ -z "$BUILD_32_BITS" ]; then export BUILD_32_BITS=OFF && echo disabling 32 bit build fi ## Build and install libc++ (Use unstable ABI for better sanitizer coverage) mkdir llvm-build && cd llvm-build cmake -GNinja \ -DCMAKE_C_COMPILER=${CC} \ -DCMAKE_CXX_COMPILER=${CXX} \ -DCMAKE_BUILD_TYPE=RelWithDebInfo \ -DCMAKE_INSTALL_PREFIX=/usr \ -DLIBCXX_ABI_UNSTABLE=OFF \ -DLLVM_USE_SANITIZER=${LIBCXX_SANITIZER} \ -DLLVM_BUILD_32_BITS=${BUILD_32_BITS} \ -DLIBCXXABI_USE_LLVM_UNWINDER=OFF \ -DLLVM_INCLUDE_TESTS=OFF \ -DLIBCXX_INCLUDE_TESTS=OFF \ -DLIBCXX_INCLUDE_BENCHMARKS=OFF \ -DLLVM_ENABLE_RUNTIMES='libcxx;libcxxabi' \ ../llvm-project/runtimes/ cmake --build . -- cxx cxxabi cd .. ================================================ FILE: .github/workflows/bazel.yml ================================================ name: bazel on: push: {} pull_request: {} env: CMAKE_GENERATOR: Ninja permissions: contents: read jobs: build_and_test_default: name: bazel.${{ matrix.os }} runs-on: ${{ matrix.os }} strategy: fail-fast: false matrix: os: [ubuntu-latest, macos-latest, windows-latest] steps: - uses: actions/checkout@de0fac2e4500dabe0009e67214ff5f5447ce83dd # v6.0.2 - name: mount bazel cache uses: actions/cache@668228422ae6a00e4ad889ee87cd7109ec5666a7 # v5.0.4 env: cache-name: bazel-cache with: path: "~/.cache/bazel" key: ${{ env.cache-name }}-${{ matrix.os }}-${{ github.ref }} restore-keys: | ${{ env.cache-name }}-${{ matrix.os }}-main - name: build run: | bazel build //:benchmark //:benchmark_main //test/... - name: test run: | bazel test --test_output=all //test/... ================================================ FILE: .github/workflows/build-and-test-min-cmake.yml ================================================ name: build-and-test-min-cmake on: push: branches: [ main ] pull_request: branches: [ main ] env: CMAKE_GENERATOR: Ninja jobs: job: name: ${{ matrix.os }}.min-cmake runs-on: ${{ matrix.os }} strategy: fail-fast: false matrix: os: [ubuntu-latest, macos-latest] steps: - uses: actions/checkout@de0fac2e4500dabe0009e67214ff5f5447ce83dd # v6.0.2 - uses: lukka/get-cmake@b78306120111dc2522750771cfd09ee7ca723687 # latest with: cmakeVersion: 3.13.0 - name: create build environment run: cmake -E make_directory ${{ runner.workspace }}/_build - name: setup cmake initial cache run: touch compiler-cache.cmake - name: configure cmake env: CXX: ${{ matrix.compiler }} shell: bash working-directory: ${{ runner.workspace }}/_build run: > cmake -C ${{ github.workspace }}/compiler-cache.cmake $GITHUB_WORKSPACE -DBENCHMARK_DOWNLOAD_DEPENDENCIES=ON -DCMAKE_CXX_VISIBILITY_PRESET=hidden -DCMAKE_VISIBILITY_INLINES_HIDDEN=ON - name: build shell: bash working-directory: ${{ runner.workspace }}/_build run: cmake --build . ================================================ FILE: .github/workflows/build-and-test-perfcounters.yml ================================================ name: build-and-test-perfcounters on: push: branches: [ main ] pull_request: branches: [ main ] env: CMAKE_GENERATOR: Ninja permissions: contents: read jobs: job: # TODO(dominic): Extend this to include compiler and set through env: CC/CXX. name: ${{ matrix.os }}.${{ matrix.build_type }} runs-on: ${{ matrix.os }} strategy: fail-fast: false matrix: os: [ubuntu-latest] build_type: ['Release', 'Debug'] steps: - uses: actions/checkout@de0fac2e4500dabe0009e67214ff5f5447ce83dd # v6.0.2 - name: install libpfm run: | sudo apt update sudo apt -y install libpfm4-dev - name: create build environment run: cmake -E make_directory ${{ runner.workspace }}/_build - name: configure cmake shell: bash working-directory: ${{ runner.workspace }}/_build run: > cmake $GITHUB_WORKSPACE -DBENCHMARK_ENABLE_LIBPFM=1 -DBENCHMARK_DOWNLOAD_DEPENDENCIES=ON -DCMAKE_BUILD_TYPE=${{ matrix.build_type }} - name: build shell: bash working-directory: ${{ runner.workspace }}/_build run: cmake --build . --config ${{ matrix.build_type }} # Skip testing, for now. It seems perf_event_open does not succeed on the # hosting machine, very likely a permissions issue. # TODO(mtrofin): Enable test. # - name: test # shell: bash # working-directory: ${{ runner.workspace }}/_build # run: ctest -C ${{ matrix.build_type }} --rerun-failed --output-on-failure ================================================ FILE: .github/workflows/build-and-test.yml ================================================ name: build-and-test on: push: branches: [ main ] pull_request: branches: [ main ] env: CMAKE_GENERATOR: Ninja jobs: # TODO: add 32-bit builds (g++ and clang++) for ubuntu # (requires g++-multilib and libc6:i386) # TODO: add coverage build (requires lcov) # TODO: add clang + libc++ builds for ubuntu job: name: ${{ matrix.os }}.${{ matrix.build_type }}.${{ matrix.lib }}.${{ matrix.compiler }} runs-on: ${{ matrix.os }} strategy: fail-fast: false matrix: os: [ubuntu-24.04, ubuntu-22.04, ubuntu-24.04-arm, macos-latest] build_type: ['Release', 'Debug'] compiler: ['g++', 'clang++'] lib: ['shared', 'static'] steps: - name: Install dependencies (macos) if: runner.os == 'macOS' run: brew install ninja - uses: actions/checkout@de0fac2e4500dabe0009e67214ff5f5447ce83dd # v6.0.2 - name: build uses: threeal/cmake-action@725d1314ccf9ea922805d7e3f9d9bcbca892b406 # v2.1.0 with: build-dir: ${{ runner.workspace }}/_build cxx-compiler: ${{ matrix.compiler }} options: | BENCHMARK_DOWNLOAD_DEPENDENCIES=ON BUILD_SHARED_LIBS=${{ matrix.lib == 'shared' }} CMAKE_BUILD_TYPE=${{ matrix.build_type }} CMAKE_CXX_COMPILER=${{ matrix.compiler }} CMAKE_CXX_VISIBILITY_PRESET=hidden CMAKE_VISIBILITY_INLINES_HIDDEN=ON - name: test shell: bash working-directory: ${{ runner.workspace }}/_build run: ctest -C ${{ matrix.build_type }} -VV msvc: name: ${{ matrix.os }}.${{ matrix.build_type }}.${{ matrix.lib }}.${{ matrix.msvc }} runs-on: ${{ matrix.os }} defaults: run: shell: powershell strategy: fail-fast: false matrix: msvc: - VS-17-2025 - VS-17-2022 build_type: - Debug - Release lib: - shared - static include: - msvc: VS-17-2025 os: windows-2025 generator: 'Visual Studio 17 2022' - msvc: VS-17-2022 os: windows-2022 generator: 'Visual Studio 17 2022' steps: - uses: actions/checkout@de0fac2e4500dabe0009e67214ff5f5447ce83dd # v6.0.2 - uses: lukka/get-cmake@b78306120111dc2522750771cfd09ee7ca723687 # latest - name: configure cmake run: > cmake -S . -B ${{ runner.workspace }}/_build/ -G "${{ matrix.generator }}" -DBENCHMARK_DOWNLOAD_DEPENDENCIES=ON -DBUILD_SHARED_LIBS=${{ matrix.lib == 'shared' }} - name: build run: cmake --build ${{ runner.workspace }}/_build/ --config ${{ matrix.build_type }} - name: test run: ctest --test-dir ${{ runner.workspace }}/_build/ -C ${{ matrix.build_type }} -VV msys2: name: ${{ matrix.os }}.${{ matrix.build_type }}.${{ matrix.lib }}.${{ matrix.msys2.msystem }} runs-on: ${{ matrix.os }} defaults: run: shell: msys2 {0} strategy: fail-fast: false matrix: os: [ windows-latest ] msys2: - { msystem: MINGW64, arch: x86_64, family: GNU, compiler: g++ } - { msystem: CLANG64, arch: x86_64, family: LLVM, compiler: clang++ } - { msystem: UCRT64, arch: x86_64, family: GNU, compiler: g++ } build_type: - Debug - Release lib: - shared - static steps: - name: setup msys2 uses: msys2/setup-msys2@4f806de0a5a7294ffabaff804b38a9b435a73bda # v2.30.0 with: cache: false msystem: ${{ matrix.msys2.msystem }} update: true install: >- git base-devel pacboy: >- gcc:p clang:p cmake:p ninja:p - uses: actions/checkout@de0fac2e4500dabe0009e67214ff5f5447ce83dd # v6.0.2 # NOTE: we can't use cmake actions here as we need to do everything in msys2 shell. - name: configure cmake env: CXX: ${{ matrix.msys2.compiler }} run: > cmake -S . -B _build/ -GNinja -DBENCHMARK_DOWNLOAD_DEPENDENCIES=ON -DBUILD_SHARED_LIBS=${{ matrix.lib == 'shared' }} - name: build run: cmake --build _build/ --config ${{ matrix.build_type }} - name: test working-directory: _build run: ctest -C ${{ matrix.build_type }} -VV ================================================ FILE: .github/workflows/clang-format-lint.yml ================================================ name: clang-format-lint on: push: {} pull_request: {} env: CMAKE_GENERATOR: Ninja permissions: contents: read jobs: job: name: check-clang-format runs-on: ubuntu-latest steps: - uses: actions/checkout@de0fac2e4500dabe0009e67214ff5f5447ce83dd # v6.0.2 - uses: DoozyX/clang-format-lint-action@bcb4eb2cb0d707ee4f3e5cc3b456eb075f12cf73 # v0.20 with: source: './include/benchmark ./src ./test ./bindings' clangFormatVersion: 18 ================================================ FILE: .github/workflows/clang-tidy-lint.yml ================================================ name: clang-tidy on: push: {} pull_request: {} env: CMAKE_GENERATOR: Ninja permissions: contents: read jobs: job: name: run-clang-tidy runs-on: ubuntu-latest strategy: fail-fast: false steps: - uses: actions/checkout@de0fac2e4500dabe0009e67214ff5f5447ce83dd # v6.0.2 - name: install clang-tidy run: sudo apt update && sudo apt -y install clang-tidy - name: create build environment run: cmake -E make_directory ${{ github.workspace }}/_build - name: configure cmake shell: bash working-directory: ${{ github.workspace }}/_build run: > cmake $GITHUB_WORKSPACE -DBENCHMARK_ENABLE_ASSEMBLY_TESTS=OFF -DBENCHMARK_ENABLE_LIBPFM=OFF -DBENCHMARK_DOWNLOAD_DEPENDENCIES=ON -DCMAKE_C_COMPILER=clang -DCMAKE_CXX_COMPILER=clang++ -DCMAKE_EXPORT_COMPILE_COMMANDS=ON -DGTEST_COMPILE_COMMANDS=OFF - name: run shell: bash working-directory: ${{ github.workspace }}/_build run: run-clang-tidy -config-file=$GITHUB_WORKSPACE/.clang-tidy ================================================ FILE: .github/workflows/doxygen.yml ================================================ name: doxygen on: push: branches: [main] pull_request: branches: [main] env: CMAKE_GENERATOR: Ninja permissions: contents: read jobs: build-and-deploy: name: Build HTML documentation runs-on: ubuntu-latest steps: - name: Fetching sources uses: actions/checkout@de0fac2e4500dabe0009e67214ff5f5447ce83dd # v6.0.2 - name: Installing build dependencies run: | sudo apt update sudo apt install doxygen gcc git - name: Creating build directory run: mkdir build - name: Building HTML documentation with Doxygen run: | cmake -S . -B build -DBENCHMARK_ENABLE_TESTING:BOOL=OFF -DBENCHMARK_ENABLE_DOXYGEN:BOOL=ON -DBENCHMARK_INSTALL_DOCS:BOOL=ON cmake --build build --target benchmark_doxygen ================================================ FILE: .github/workflows/ossf.yml ================================================ name: OSSF Scorecard Weekly on: schedule: - cron: '0 0 * * 0' # Runs every Sunday at midnight UTC workflow_dispatch: permissions: contents: read jobs: ossf-scorecard: # To write a badge permissions: id-token: write runs-on: ubuntu-latest steps: - name: Checkout repository uses: actions/checkout@de0fac2e4500dabe0009e67214ff5f5447ce83dd # v6.0.2 - name: Run analysis uses: ossf/scorecard-action@4eaacf0543bb3f2c246792bd56e8cdeffafb205a # v2.4.3 with: publish_results: true results_file: ossf_scorecard.json results_format: json ================================================ FILE: .github/workflows/pre-commit.yml ================================================ name: python + Bazel pre-commit checks on: push: branches: [ main ] pull_request: branches: [ main ] jobs: pre-commit: runs-on: ubuntu-latest steps: - uses: actions/checkout@de0fac2e4500dabe0009e67214ff5f5447ce83dd # v6.0.2 with: persist-credentials: false - name: Install uv uses: astral-sh/setup-uv@37802adc94f370d6bfd71619e3f0bf239e1f3b78 # v7.6.0 with: python-version: 3.12 - name: Run pre-commit checks run: uv run --only-group=dev pre-commit run --all-files --verbose --show-diff-on-failure ================================================ FILE: .github/workflows/sanitizer.yml ================================================ name: sanitizer on: push: {} pull_request: {} env: CMAKE_GENERATOR: Ninja UBSAN_OPTIONS: "print_stacktrace=1" jobs: job: name: ${{ matrix.sanitizer }}.${{ matrix.build_type }} runs-on: ubuntu-latest strategy: fail-fast: false matrix: build_type: ['Debug', 'RelWithDebInfo'] sanitizer: ['asan', 'ubsan', 'tsan', 'msan'] steps: - uses: actions/checkout@de0fac2e4500dabe0009e67214ff5f5447ce83dd # v6.0.2 - name: configure msan env if: matrix.sanitizer == 'msan' run: | echo "EXTRA_FLAGS=-g -O2 -fno-omit-frame-pointer -fsanitize=memory -fsanitize-memory-track-origins" >> $GITHUB_ENV echo "LIBCXX_SANITIZER=MemoryWithOrigins" >> $GITHUB_ENV - name: configure ubsan env if: matrix.sanitizer == 'ubsan' run: | echo "EXTRA_FLAGS=-g -O2 -fno-omit-frame-pointer -fsanitize=undefined -fno-sanitize-recover=all" >> $GITHUB_ENV echo "LIBCXX_SANITIZER=Undefined" >> $GITHUB_ENV - name: configure asan env if: matrix.sanitizer == 'asan' run: | echo "EXTRA_FLAGS=-g -O2 -fno-omit-frame-pointer -fsanitize=address -fno-sanitize-recover=all" >> $GITHUB_ENV echo "LIBCXX_SANITIZER=Address" >> $GITHUB_ENV - name: configure tsan env if: matrix.sanitizer == 'tsan' run: | echo "EXTRA_FLAGS=-g -O2 -fno-omit-frame-pointer -fsanitize=thread -fno-sanitize-recover=all" >> $GITHUB_ENV echo "LIBCXX_SANITIZER=Thread" >> $GITHUB_ENV - name: fine-tune asan options # in asan we get an error from std::regex. ignore it. if: matrix.sanitizer == 'asan' run: | echo "ASAN_OPTIONS=alloc_dealloc_mismatch=0" >> $GITHUB_ENV - name: setup clang uses: egor-tensin/setup-clang@471a6f8ef1d449dba8e1a51780e7f943572a3f99 # v2.1 with: version: latest platform: x64 - name: configure clang run: | echo "CC=cc" >> $GITHUB_ENV echo "CXX=c++" >> $GITHUB_ENV - name: build libc++ (non-asan) if: matrix.sanitizer != 'asan' run: | "${GITHUB_WORKSPACE}/.github/libcxx-setup.sh" echo "EXTRA_CXX_FLAGS=-stdlib=libc++ -L${GITHUB_WORKSPACE}/llvm-build/lib -lc++abi -I${GITHUB_WORKSPACE}/llvm-build/include/c++/v1 -Isystem${GITHUB_WORKSPACE}/llvm-build/include/c++/v1 -Wl,-rpath,${GITHUB_WORKSPACE}/llvm-build/lib" >> $GITHUB_ENV - name: create build environment run: cmake -E make_directory ${{ runner.workspace }}/_build - name: configure cmake shell: bash working-directory: ${{ runner.workspace }}/_build run: > VERBOSE=1 cmake -GNinja $GITHUB_WORKSPACE -DBENCHMARK_ENABLE_ASSEMBLY_TESTS=OFF -DBENCHMARK_ENABLE_LIBPFM=OFF -DBENCHMARK_DOWNLOAD_DEPENDENCIES=ON -DCMAKE_C_COMPILER=${{ env.CC }} -DCMAKE_CXX_COMPILER=${{ env.CXX }} -DCMAKE_C_FLAGS="${{ env.EXTRA_FLAGS }}" -DCMAKE_CXX_FLAGS="${{ env.EXTRA_FLAGS }} ${{ env.EXTRA_CXX_FLAGS }}" -DCMAKE_BUILD_TYPE=${{ matrix.build_type }} - name: build shell: bash working-directory: ${{ runner.workspace }}/_build run: cmake --build . --config ${{ matrix.build_type }} - name: test shell: bash working-directory: ${{ runner.workspace }}/_build run: ctest -C ${{ matrix.build_type }} -VV ================================================ FILE: .github/workflows/test_bindings.yml ================================================ name: test-bindings on: push: branches: [main] pull_request: branches: [main] env: CMAKE_GENERATOR: Ninja permissions: contents: read jobs: python_bindings: name: Test GBM Python ${{ matrix.python-version }} bindings on ${{ matrix.os }} runs-on: ${{ matrix.os }} strategy: fail-fast: false matrix: os: [ ubuntu-latest, macos-latest, windows-latest ] python-version: [ "3.10", "3.11", "3.12", "3.13" ] steps: - uses: actions/checkout@de0fac2e4500dabe0009e67214ff5f5447ce83dd # v6.0.2 with: fetch-depth: 0 - name: Set up Python ${{ matrix.python-version }} uses: actions/setup-python@a309ff8b426b58ec0e2a45f0f869d46889d02405 # v6.2.0 with: python-version: ${{ matrix.python-version }} - name: Install GBM Python bindings on ${{ matrix.os }} run: python -m pip install . - name: Run example on ${{ matrix.os }} under Python ${{ matrix.python-version }} run: python bindings/python/google_benchmark/example.py ================================================ FILE: .github/workflows/wheels.yml ================================================ name: Build and upload Python wheels on: workflow_dispatch: release: types: - published env: CMAKE_GENERATOR: Ninja jobs: build_sdist: name: Build source distribution runs-on: ubuntu-latest steps: - name: Check out repo uses: actions/checkout@de0fac2e4500dabe0009e67214ff5f5447ce83dd # v6.0.2 with: fetch-depth: 0 - name: Install Python 3.12 uses: actions/setup-python@a309ff8b426b58ec0e2a45f0f869d46889d02405 # v6.2.0 with: python-version: "3.12" - run: python -m pip install build - name: Build sdist run: python -m build --sdist - uses: actions/upload-artifact@bbbca2ddaa5d8feaa63e36b76fdaad77386f024f # v7.0.0 with: name: dist-sdist path: dist/*.tar.gz build_wheels: name: Build Google Benchmark wheels on ${{ matrix.os }} runs-on: ${{ matrix.os }} strategy: matrix: os: [ubuntu-latest, ubuntu-24.04-arm, macos-15-intel, macos-latest, windows-latest] steps: - name: Check out Google Benchmark uses: actions/checkout@de0fac2e4500dabe0009e67214ff5f5447ce83dd # v6.0.2 with: fetch-depth: 0 - uses: actions/setup-python@a309ff8b426b58ec0e2a45f0f869d46889d02405 # v6.2.0 name: Install Python 3.12 with: python-version: "3.12" - name: Install the latest version of uv uses: astral-sh/setup-uv@37802adc94f370d6bfd71619e3f0bf239e1f3b78 # v7.6.0 - name: Build wheels on ${{ matrix.os }} using cibuildwheel uses: pypa/cibuildwheel@ee02a1537ce3071a004a6b08c41e72f0fdc42d9a # v3.4.0 env: CIBW_BUILD: "cp310-* cp311-* cp312-*" CIBW_BUILD_FRONTEND: "build[uv]" CIBW_SKIP: "*-musllinux_*" CIBW_ARCHS: auto64 CIBW_BEFORE_ALL_LINUX: bash .github/install_bazel.sh # Grab the rootless Bazel installation inside the container. CIBW_ENVIRONMENT_LINUX: PATH=$PATH:$HOME/bin CIBW_TEST_COMMAND: python {project}/bindings/python/google_benchmark/example.py # unused by Bazel, but needed explicitly by delocate on MacOS. MACOSX_DEPLOYMENT_TARGET: ${{ matrix.os == 'macos-15-intel' && 10.14 || 11.0 }} - name: Upload Google Benchmark ${{ matrix.os }} wheels uses: actions/upload-artifact@bbbca2ddaa5d8feaa63e36b76fdaad77386f024f # v7.0.0 with: name: dist-${{ matrix.os }} path: wheelhouse/*.whl pypi_upload: name: Publish google-benchmark wheels to PyPI needs: [build_sdist, build_wheels] runs-on: ubuntu-latest permissions: id-token: write steps: - uses: actions/download-artifact@3e5f45b2cfb9172054b4087a40e8e0b5a5461e7c # v8.0.1 with: path: dist pattern: dist-* merge-multiple: true - uses: pypa/gh-action-pypi-publish@ed0c53931b1dc9bd32cbe73a98c7f6766f8a527e # release/v1 ================================================ FILE: .gitignore ================================================ *.a *.so *.so.?* *.dll *.exe *.dylib *.cmake !/cmake/*.cmake !/test/AssemblyTests.cmake *~ *.swp *.pyc __pycache__ .DS_Store # lcov *.lcov /lcov # cmake files. /Testing CMakeCache.txt CMakeFiles/ cmake_install.cmake # makefiles. Makefile # in-source build. bin/ lib/ /test/*_test # exuberant ctags. tags # YouCompleteMe configuration. .ycm_extra_conf.pyc # ninja generated files. .ninja_deps .ninja_log build.ninja install_manifest.txt rules.ninja # bazel output symlinks. bazel-* MODULE.bazel.lock # out-of-source build top-level folders. build/ _build/ build*/ # in-source dependencies /googletest/ # Visual Studio 2015/2017 cache/options directory .vs/ CMakeSettings.json # Visual Studio Code cache/options directory .vscode/ # Python build stuff dist/ *.egg-info* uv.lock ================================================ FILE: .pre-commit-config.yaml ================================================ repos: - repo: https://github.com/keith/pre-commit-buildifier rev: 8.2.1 hooks: - id: buildifier - id: buildifier-lint - repo: https://github.com/pre-commit/mirrors-mypy rev: v1.18.2 hooks: - id: mypy types_or: [ python, pyi ] args: [ "--ignore-missing-imports", "--scripts-are-modules" ] - repo: https://github.com/astral-sh/ruff-pre-commit rev: v0.14.0 hooks: - id: ruff-check args: [ --fix, --exit-non-zero-on-fix ] - id: ruff-format ================================================ FILE: .ycm_extra_conf.py ================================================ import os import ycm_core # These are the compilation flags that will be used in case there's no # compilation database set (by default, one is not set). # CHANGE THIS LIST OF FLAGS. YES, THIS IS THE DROID YOU HAVE BEEN LOOKING FOR. flags = [ "-Wall", "-Werror", "-pedantic-errors", "-std=c++0x", "-fno-strict-aliasing", "-O3", "-DNDEBUG", # ...and the same thing goes for the magic -x option which specifies the # language that the files to be compiled are written in. This is mostly # relevant for c++ headers. # For a C project, you would set this to 'c' instead of 'c++'. "-x", "c++", "-I", "include", "-isystem", "/usr/include", "-isystem", "/usr/local/include", ] # Set this to the absolute path to the folder (NOT the file!) containing the # compile_commands.json file to use that instead of 'flags'. See here for # more details: http://clang.llvm.org/docs/JSONCompilationDatabase.html # # Most projects will NOT need to set this to anything; you can just change the # 'flags' list of compilation flags. Notice that YCM itself uses that approach. compilation_database_folder = "" if os.path.exists(compilation_database_folder): database = ycm_core.CompilationDatabase(compilation_database_folder) else: database = None SOURCE_EXTENSIONS = [".cc"] def DirectoryOfThisScript(): return os.path.dirname(os.path.abspath(__file__)) def MakeRelativePathsInFlagsAbsolute(flags, working_directory): if not working_directory: return list(flags) new_flags = [] make_next_absolute = False path_flags = ["-isystem", "-I", "-iquote", "--sysroot="] for flag in flags: new_flag = flag if make_next_absolute: make_next_absolute = False if not flag.startswith("/"): new_flag = os.path.join(working_directory, flag) for path_flag in path_flags: if flag == path_flag: make_next_absolute = True break if flag.startswith(path_flag): path = flag[len(path_flag) :] new_flag = path_flag + os.path.join(working_directory, path) break if new_flag: new_flags.append(new_flag) return new_flags def IsHeaderFile(filename): extension = os.path.splitext(filename)[1] return extension in [".h", ".hxx", ".hpp", ".hh"] def GetCompilationInfoForFile(filename): # The compilation_commands.json file generated by CMake does not have # entries for header files. So we do our best by asking the db for flags for # a corresponding source file, if any. If one exists, the flags for that # file should be good enough. if IsHeaderFile(filename): basename = os.path.splitext(filename)[0] for extension in SOURCE_EXTENSIONS: replacement_file = basename + extension if os.path.exists(replacement_file): compilation_info = database.GetCompilationInfoForFile( replacement_file ) if compilation_info.compiler_flags_: return compilation_info return None return database.GetCompilationInfoForFile(filename) def FlagsForFile(filename, **kwargs): if database: # Bear in mind that compilation_info.compiler_flags_ does NOT return a # python list, but a "list-like" StringVec object compilation_info = GetCompilationInfoForFile(filename) if not compilation_info: return None final_flags = MakeRelativePathsInFlagsAbsolute( compilation_info.compiler_flags_, compilation_info.compiler_working_dir_, ) else: relative_to = DirectoryOfThisScript() final_flags = MakeRelativePathsInFlagsAbsolute(flags, relative_to) return {"flags": final_flags, "do_cache": True} ================================================ FILE: AUTHORS ================================================ # This is the official list of benchmark authors for copyright purposes. # This file is distinct from the CONTRIBUTORS files. # See the latter for an explanation. # # Names should be added to this file as: # Name or Organization # The email address is not required for organizations. # # Please keep the list sorted. Albert Pretorius Alex Steele Andriy Berestovskyy Arne Beer Benjamin King Carto Cezary Skrzyński Christian Wassermann Christopher Seymour Colin Braley Daniel Harvey David Coeurjolly Deniz Evrenci Dirac Research Dominik Czarnota Dominik Korman Donald Aingworth Eric Backus Eric Fiselier Eugene Zhuk Evgeny Safronov Fabien Pichot Federico Ficarelli Felix Homann Gergely Meszaros Gergő Szitár Google Inc. Henrique Bucher International Business Machines Corporation Ismael Jimenez Martinez Jern-Kuan Leong JianXiong Zhou Joao Paulo Magalhaes Jordan Williams Jussi Knuuttila Kaito Udagawa Kishan Kumar Kostiantyn Lazukin Lei Xu Marcel Jacobse Matt Clarkson Maxim Vafin Mike Apodaca Min-Yih Hsu MongoDB Inc. Nick Hutchinson Norman Heino Oleksandr Sochka Olga Fadeeva Ori Livneh Paul Redmond Prithvi Rao Radoslav Yovchev Raghu Raja Rainer Orth Roman Lebedev Sayan Bhattacharjee Shapr3D Shashank Thakur Shuo Chen Staffan Tjernstrom Steinar H. Gunderson Stripe, Inc. Tobias Schmidt Yixuan Qiu Yusuke Suzuki Zbigniew Skowron ================================================ FILE: BUILD.bazel ================================================ load("@rules_cc//cc:defs.bzl", "cc_library") licenses(["notice"]) COPTS = [ "-pedantic", "-pedantic-errors", "-std=c++17", "-Wall", "-Wconversion", "-Wextra", "-Wshadow", # "-Wshorten-64-to-32", "-Wfloat-equal", "-Wformat=2", "-fstrict-aliasing", ## assert() are used a lot in tests upstream, which may be optimised out leading to ## unused-variable warning. "-Wno-unused-variable", "-Werror=old-style-cast", ] MSVC_COPTS = [ "/std:c++17", ] config_setting( name = "windows", constraint_values = ["@platforms//os:windows"], visibility = [":__subpackages__"], ) config_setting( name = "perfcounters", define_values = { "pfm": "1", }, visibility = [":__subpackages__"], ) cc_library( name = "benchmark", srcs = glob( [ "src/*.cc", "src/*.h", ], exclude = ["src/benchmark_main.cc"], ), hdrs = [ "include/benchmark/benchmark.h", "include/benchmark/benchmark_api.h", "include/benchmark/counter.h", "include/benchmark/export.h", "include/benchmark/macros.h", "include/benchmark/managers.h", "include/benchmark/registration.h", "include/benchmark/reporter.h", "include/benchmark/state.h", "include/benchmark/statistics.h", "include/benchmark/sysinfo.h", "include/benchmark/types.h", "include/benchmark/utils.h", ], copts = select({ ":windows": MSVC_COPTS, "//conditions:default": COPTS, }), defines = [ "BENCHMARK_STATIC_DEFINE", "BENCHMARK_VERSION=\\\"" + (module_version() if module_version() != None else "") + "\\\"", ] + select({ ":perfcounters": ["HAVE_LIBPFM"], "//conditions:default": [], }), includes = ["include"], linkopts = select({ ":windows": ["-DEFAULTLIB:shlwapi.lib"], "//conditions:default": ["-pthread"], }), # Only static linking is allowed; no .so will be produced. # Using `defines` (i.e. not `local_defines`) means that no # dependent rules need to bother about defining the macro. linkstatic = True, local_defines = [ # Turn on Large-file Support "_FILE_OFFSET_BITS=64", "_LARGEFILE64_SOURCE", "_LARGEFILE_SOURCE", ], visibility = ["//visibility:public"], deps = select({ ":perfcounters": ["@libpfm"], "//conditions:default": [], }), ) cc_library( name = "benchmark_main", srcs = ["src/benchmark_main.cc"], hdrs = [ "include/benchmark/benchmark.h", "include/benchmark/benchmark_api.h", "include/benchmark/counter.h", "include/benchmark/export.h", "include/benchmark/macros.h", "include/benchmark/managers.h", "include/benchmark/registration.h", "include/benchmark/reporter.h", "include/benchmark/state.h", "include/benchmark/statistics.h", "include/benchmark/sysinfo.h", "include/benchmark/types.h", "include/benchmark/utils.h", ], includes = ["include"], visibility = ["//visibility:public"], deps = [":benchmark"], ) cc_library( name = "benchmark_internal_headers", hdrs = glob(["src/*.h"]), visibility = ["//test:__pkg__"], ) ================================================ FILE: CMakeLists.txt ================================================ # Require CMake 3.10. If available, use the policies up to CMake 3.22. cmake_minimum_required (VERSION 3.13...3.22) project (benchmark VERSION 1.9.5 LANGUAGES CXX) option(BENCHMARK_ENABLE_TESTING "Enable testing of the benchmark library." ON) option(BENCHMARK_ENABLE_EXCEPTIONS "Enable the use of exceptions in the benchmark library." ON) option(BENCHMARK_ENABLE_LTO "Enable link time optimisation of the benchmark library." OFF) option(BENCHMARK_USE_LIBCXX "Build and test using libc++ as the standard library." OFF) option(BENCHMARK_ENABLE_WERROR "Build Release candidates with -Werror." ON) option(BENCHMARK_FORCE_WERROR "Build Release candidates with -Werror regardless of compiler issues." OFF) if("${CMAKE_CXX_COMPILER_ID}" STREQUAL "PGI") # PGC++ maybe reporting false positives. set(BENCHMARK_ENABLE_WERROR OFF) endif() if("${CMAKE_CXX_COMPILER_ID}" STREQUAL "NVHPC") set(BENCHMARK_ENABLE_WERROR OFF) endif() if(BENCHMARK_FORCE_WERROR) set(BENCHMARK_ENABLE_WERROR ON) endif(BENCHMARK_FORCE_WERROR) if(NOT (MSVC OR CMAKE_CXX_SIMULATE_ID STREQUAL "MSVC")) option(BENCHMARK_BUILD_32_BITS "Build a 32 bit version of the library." OFF) else() set(BENCHMARK_BUILD_32_BITS OFF CACHE BOOL "Build a 32 bit version of the library - unsupported when using MSVC)" FORCE) endif() option(BENCHMARK_ENABLE_INSTALL "Enable installation of benchmark. (Projects embedding benchmark may want to turn this OFF.)" ON) option(BENCHMARK_ENABLE_DOXYGEN "Build documentation with Doxygen." OFF) option(BENCHMARK_INSTALL_DOCS "Enable installation of documentation." ON) option(BENCHMARK_INSTALL_TOOLS "Enable installation of tools." ON) # Allow unmet dependencies to be met using CMake's ExternalProject mechanics, which # may require downloading the source code. option(BENCHMARK_DOWNLOAD_DEPENDENCIES "Allow the downloading and in-tree building of unmet dependencies" OFF) # This option can be used to disable building and running unit tests which depend on gtest # in cases where it is not possible to build or find a valid version of gtest. option(BENCHMARK_ENABLE_GTEST_TESTS "Enable building the unit tests which depend on gtest" ON) option(BENCHMARK_USE_BUNDLED_GTEST "Use bundled GoogleTest. If disabled, the find_package(GTest) will be used." ON) option(BENCHMARK_ENABLE_LIBPFM "Enable performance counters provided by libpfm" OFF) # Export only public symbols set(CMAKE_CXX_VISIBILITY_PRESET hidden) set(CMAKE_VISIBILITY_INLINES_HIDDEN ON) if(CMAKE_CXX_COMPILER_ID STREQUAL "MSVC") # As of CMake 3.18, CMAKE_SYSTEM_PROCESSOR is not set properly for MSVC and # cross-compilation (e.g. Host=x86_64, target=aarch64) requires using the # undocumented, but working variable. # See https://gitlab.kitware.com/cmake/cmake/-/issues/15170 set(CMAKE_SYSTEM_PROCESSOR ${MSVC_CXX_ARCHITECTURE_ID}) if(${CMAKE_SYSTEM_PROCESSOR} MATCHES "ARM") set(CMAKE_CROSSCOMPILING TRUE) endif() endif() set(ENABLE_ASSEMBLY_TESTS_DEFAULT OFF) function(should_enable_assembly_tests) if(CMAKE_BUILD_TYPE) string(TOLOWER ${CMAKE_BUILD_TYPE} CMAKE_BUILD_TYPE_LOWER) if (${CMAKE_BUILD_TYPE_LOWER} MATCHES "coverage") # FIXME: The --coverage flag needs to be removed when building assembly # tests for this to work. return() endif() endif() if (MSVC OR CMAKE_CXX_SIMULATE_ID STREQUAL "MSVC") return() elseif(NOT CMAKE_SYSTEM_PROCESSOR MATCHES "x86_64") return() elseif(NOT CMAKE_SIZEOF_VOID_P EQUAL 8) # FIXME: Make these work on 32 bit builds return() elseif(BENCHMARK_BUILD_32_BITS) # FIXME: Make these work on 32 bit builds return() endif() find_program(LLVM_FILECHECK_EXE FileCheck) if (LLVM_FILECHECK_EXE) set(LLVM_FILECHECK_EXE "${LLVM_FILECHECK_EXE}" CACHE PATH "llvm filecheck" FORCE) message(STATUS "LLVM FileCheck Found: ${LLVM_FILECHECK_EXE}") else() message(STATUS "Failed to find LLVM FileCheck") return() endif() set(ENABLE_ASSEMBLY_TESTS_DEFAULT ON PARENT_SCOPE) endfunction() should_enable_assembly_tests() # This option disables the building and running of the assembly verification tests option(BENCHMARK_ENABLE_ASSEMBLY_TESTS "Enable building and running the assembly tests" ${ENABLE_ASSEMBLY_TESTS_DEFAULT}) # Make sure we can import out CMake functions list(APPEND CMAKE_MODULE_PATH "${CMAKE_CURRENT_SOURCE_DIR}/cmake/Modules") list(APPEND CMAKE_MODULE_PATH "${CMAKE_CURRENT_SOURCE_DIR}/cmake") # Read the git tags to determine the project version include(GetGitVersion) get_git_version(GIT_VERSION) # If no git version can be determined, use the version # from the project() command if ("${GIT_VERSION}" STREQUAL "v0.0.0") set(VERSION "v${benchmark_VERSION}") else() set(VERSION "${GIT_VERSION}") endif() # Normalize version: drop "v" prefix, replace first "-" with ".", # drop everything after second "-" (including said "-"). string(STRIP ${VERSION} VERSION) if(VERSION MATCHES v[^-]*-) string(REGEX REPLACE "v([^-]*)-([0-9]+)-.*" "\\1.\\2" NORMALIZED_VERSION ${VERSION}) else() string(REGEX REPLACE "v(.*)" "\\1" NORMALIZED_VERSION ${VERSION}) endif() # Tell the user what versions we are using message(STATUS "Google Benchmark version: ${VERSION}, normalized to ${NORMALIZED_VERSION}") # The version of the libraries set(GENERIC_LIB_VERSION ${NORMALIZED_VERSION}) string(SUBSTRING ${NORMALIZED_VERSION} 0 1 GENERIC_LIB_SOVERSION) # Import our CMake modules include(AddCXXCompilerFlag) include(CheckCXXCompilerFlag) include(CheckLibraryExists) include(CXXFeatureCheck) # Check for rt library, but explicitly disable for QNX if(QNXNTO) set(HAVE_LIB_RT FALSE) else() check_library_exists(rt shm_open "" HAVE_LIB_RT) endif() if (BENCHMARK_BUILD_32_BITS) add_required_cxx_compiler_flag(-m32) endif() set(BENCHMARK_CXX_STANDARD 17) set(CMAKE_CXX_STANDARD ${BENCHMARK_CXX_STANDARD}) set(CMAKE_CXX_STANDARD_REQUIRED YES) set(CMAKE_CXX_EXTENSIONS OFF) if (MSVC) # Turn compiler warnings up to 11 string(REGEX REPLACE "[-/]W[1-4]" "" CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS}") set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} /W4") # MP flag only applies to cl, not cl frontends to other compilers (e.g. clang-cl, icx-cl etc) if(CMAKE_CXX_COMPILER_ID MATCHES MSVC) set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} /MP") endif() add_definitions(-D_CRT_SECURE_NO_WARNINGS) if(BENCHMARK_ENABLE_WERROR) add_cxx_compiler_flag(-WX) endif() if (NOT BENCHMARK_ENABLE_EXCEPTIONS) add_cxx_compiler_flag(-EHs-) add_cxx_compiler_flag(-EHa-) add_definitions(-D_HAS_EXCEPTIONS=0) endif() # Link time optimisation if (BENCHMARK_ENABLE_LTO) set(CMAKE_CXX_FLAGS_RELEASE "${CMAKE_CXX_FLAGS_RELEASE} /GL") set(CMAKE_STATIC_LINKER_FLAGS_RELEASE "${CMAKE_STATIC_LINKER_FLAGS_RELEASE} /LTCG") set(CMAKE_SHARED_LINKER_FLAGS_RELEASE "${CMAKE_SHARED_LINKER_FLAGS_RELEASE} /LTCG") set(CMAKE_EXE_LINKER_FLAGS_RELEASE "${CMAKE_EXE_LINKER_FLAGS_RELEASE} /LTCG") set(CMAKE_CXX_FLAGS_RELWITHDEBINFO "${CMAKE_CXX_FLAGS_RELWITHDEBINFO} /GL") string(REGEX REPLACE "[-/]INCREMENTAL" "/INCREMENTAL:NO" CMAKE_STATIC_LINKER_FLAGS_RELWITHDEBINFO "${CMAKE_STATIC_LINKER_FLAGS_RELWITHDEBINFO}") set(CMAKE_STATIC_LINKER_FLAGS_RELWITHDEBINFO "${CMAKE_STATIC_LINKER_FLAGS_RELWITHDEBINFO} /LTCG") string(REGEX REPLACE "[-/]INCREMENTAL" "/INCREMENTAL:NO" CMAKE_SHARED_LINKER_FLAGS_RELWITHDEBINFO "${CMAKE_SHARED_LINKER_FLAGS_RELWITHDEBINFO}") set(CMAKE_SHARED_LINKER_FLAGS_RELWITHDEBINFO "${CMAKE_SHARED_LINKER_FLAGS_RELWITHDEBINFO} /LTCG") string(REGEX REPLACE "[-/]INCREMENTAL" "/INCREMENTAL:NO" CMAKE_EXE_LINKER_FLAGS_RELWITHDEBINFO "${CMAKE_EXE_LINKER_FLAGS_RELWITHDEBINFO}") set(CMAKE_EXE_LINKER_FLAGS_RELWITHDEBINFO "${CMAKE_EXE_LINKER_FLAGS_RELWITHDEBINFO} /LTCG") set(CMAKE_CXX_FLAGS_MINSIZEREL "${CMAKE_CXX_FLAGS_MINSIZEREL} /GL") set(CMAKE_STATIC_LINKER_FLAGS_MINSIZEREL "${CMAKE_STATIC_LINKER_FLAGS_MINSIZEREL} /LTCG") set(CMAKE_SHARED_LINKER_FLAGS_MINSIZEREL "${CMAKE_SHARED_LINKER_FLAGS_MINSIZEREL} /LTCG") set(CMAKE_EXE_LINKER_FLAGS_MINSIZEREL "${CMAKE_EXE_LINKER_FLAGS_MINSIZEREL} /LTCG") endif() else() # Turn on Large-file Support add_definitions(-D_FILE_OFFSET_BITS=64) add_definitions(-D_LARGEFILE64_SOURCE) add_definitions(-D_LARGEFILE_SOURCE) # Turn compiler warnings up to 11 add_cxx_compiler_flag(-Wall) add_cxx_compiler_flag(-Wextra) add_cxx_compiler_flag(-Wshadow) add_cxx_compiler_flag(-Wfloat-equal) add_cxx_compiler_flag(-Wold-style-cast) add_cxx_compiler_flag(-Wconversion) add_cxx_compiler_flag(-Wformat=2) if(BENCHMARK_ENABLE_WERROR) add_cxx_compiler_flag(-Werror) endif() if (NOT BENCHMARK_ENABLE_TESTING) # Disable warning when compiling tests as gtest does not use 'override'. add_cxx_compiler_flag(-Wsuggest-override) endif() add_cxx_compiler_flag(-pedantic) add_cxx_compiler_flag(-pedantic-errors) add_cxx_compiler_flag(-Wshorten-64-to-32) add_cxx_compiler_flag(-fstrict-aliasing) # Disable warnings regarding deprecated parts of the library while building # and testing those parts of the library. add_cxx_compiler_flag(-Wno-deprecated-declarations) if (CMAKE_CXX_COMPILER_ID STREQUAL "Intel" OR CMAKE_CXX_COMPILER_ID STREQUAL "IntelLLVM") # Intel silently ignores '-Wno-deprecated-declarations', # warning no. 1786 must be explicitly disabled. # See #631 for rationale. add_cxx_compiler_flag(-wd1786) add_cxx_compiler_flag(-fno-finite-math-only) # ICC17u2: overloaded virtual function "benchmark::Fixture::SetUp" is only partially # overridden (because of deprecated overload) add_cxx_compiler_flag(-wd654) endif() # Disable deprecation warnings for release builds (when -Werror is enabled). if(BENCHMARK_ENABLE_WERROR) add_cxx_compiler_flag(-Wno-deprecated) endif() if (NOT BENCHMARK_ENABLE_EXCEPTIONS) add_cxx_compiler_flag(-fno-exceptions) endif() if (HAVE_CXX_FLAG_FSTRICT_ALIASING) if (NOT CMAKE_CXX_COMPILER_ID STREQUAL "Intel" AND NOT CMAKE_CXX_COMPILER_ID STREQUAL "IntelLLVM") #ICC17u2: Many false positives for Wstrict-aliasing add_cxx_compiler_flag(-Wstrict-aliasing) endif() endif() add_cxx_compiler_flag(-Wthread-safety) if (HAVE_CXX_FLAG_WTHREAD_SAFETY) cxx_feature_check(THREAD_SAFETY_ATTRIBUTES "-DINCLUDE_DIRECTORIES=${PROJECT_SOURCE_DIR}/include") endif() # On most UNIX like platforms g++ and clang++ define _GNU_SOURCE as a # predefined macro, which turns on all of the wonderful libc extensions. # However g++ doesn't do this in Cygwin so we have to define it ourselves # since we depend on GNU/POSIX/BSD extensions. if (CYGWIN) add_definitions(-D_GNU_SOURCE=1) endif() if (QNXNTO) add_definitions(-D_QNX_SOURCE) endif() # Link time optimisation if (BENCHMARK_ENABLE_LTO) add_cxx_compiler_flag(-flto) add_cxx_compiler_flag(-Wno-lto-type-mismatch) if ("${CMAKE_CXX_COMPILER_ID}" STREQUAL "GNU") find_program(GCC_AR gcc-ar) if (GCC_AR) set(CMAKE_AR ${GCC_AR}) endif() find_program(GCC_RANLIB gcc-ranlib) if (GCC_RANLIB) set(CMAKE_RANLIB ${GCC_RANLIB}) endif() elseif("${CMAKE_CXX_COMPILER_ID}" MATCHES "Clang") include(llvm-toolchain) endif() endif() # Coverage build type set(BENCHMARK_CXX_FLAGS_COVERAGE "${CMAKE_CXX_FLAGS_DEBUG}" CACHE STRING "Flags used by the C++ compiler during coverage builds." FORCE) set(BENCHMARK_EXE_LINKER_FLAGS_COVERAGE "${CMAKE_EXE_LINKER_FLAGS_DEBUG}" CACHE STRING "Flags used for linking binaries during coverage builds." FORCE) set(BENCHMARK_SHARED_LINKER_FLAGS_COVERAGE "${CMAKE_SHARED_LINKER_FLAGS_DEBUG}" CACHE STRING "Flags used by the shared libraries linker during coverage builds." FORCE) mark_as_advanced( BENCHMARK_CXX_FLAGS_COVERAGE BENCHMARK_EXE_LINKER_FLAGS_COVERAGE BENCHMARK_SHARED_LINKER_FLAGS_COVERAGE) set(CMAKE_BUILD_TYPE "${CMAKE_BUILD_TYPE}" CACHE STRING "Choose the type of build, options are: None Debug Release RelWithDebInfo MinSizeRel Coverage.") add_cxx_compiler_flag(--coverage COVERAGE) endif() if (BENCHMARK_USE_LIBCXX) if ("${CMAKE_CXX_COMPILER_ID}" MATCHES "Clang") add_cxx_compiler_flag(-stdlib=libc++) elseif ("${CMAKE_CXX_COMPILER_ID}" STREQUAL "GNU" OR "${CMAKE_CXX_COMPILER_ID}" STREQUAL "Intel" OR "${CMAKE_CXX_COMPILER_ID}" STREQUAL "IntelLLVM") add_cxx_compiler_flag(-nostdinc++) message(WARNING "libc++ header path must be manually specified using CMAKE_CXX_FLAGS") # Adding -nodefaultlibs directly to CMAKE__LINKER_FLAGS will break # configuration checks such as 'find_package(Threads)' list(APPEND BENCHMARK_CXX_LINKER_FLAGS -nodefaultlibs) # -lc++ cannot be added directly to CMAKE__LINKER_FLAGS because # linker flags appear before all linker inputs and -lc++ must appear after. list(APPEND BENCHMARK_CXX_LIBRARIES c++) else() message(FATAL_ERROR "-DBENCHMARK_USE_LIBCXX:BOOL=ON is not supported for compiler") endif() endif(BENCHMARK_USE_LIBCXX) # C++ feature checks # Determine the correct regular expression engine to use cxx_feature_check(STD_REGEX) cxx_feature_check(GNU_POSIX_REGEX) cxx_feature_check(POSIX_REGEX) if(NOT HAVE_STD_REGEX AND NOT HAVE_GNU_POSIX_REGEX AND NOT HAVE_POSIX_REGEX) message(FATAL_ERROR "Failed to determine the source files for the regular expression backend") endif() if (NOT BENCHMARK_ENABLE_EXCEPTIONS AND HAVE_STD_REGEX AND NOT HAVE_GNU_POSIX_REGEX AND NOT HAVE_POSIX_REGEX) message(WARNING "Using std::regex with exceptions disabled is not fully supported") endif() cxx_feature_check(STEADY_CLOCK) # Ensure we have pthreads set(THREADS_PREFER_PTHREAD_FLAG ON) find_package(Threads REQUIRED) # cxx_feature_check relies on try_run to probe compiler features. Because this # check does not produce a real target, target_link_libraries cannot be applied. # Therefore, link libraries are forwarded to try_run through # BENCHMARK_CXX_LIBRARIES. list(APPEND BENCHMARK_CXX_LIBRARIES Threads::Threads) cxx_feature_check(PTHREAD_AFFINITY) if (BENCHMARK_ENABLE_LIBPFM) find_package(PFM REQUIRED) endif() # Set up directories include_directories(${PROJECT_SOURCE_DIR}/include) # Build the targets add_subdirectory(src) if (BENCHMARK_ENABLE_TESTING) enable_testing() if (BENCHMARK_ENABLE_GTEST_TESTS AND NOT (TARGET gtest AND TARGET gtest_main AND TARGET gmock AND TARGET gmock_main)) if (BENCHMARK_USE_BUNDLED_GTEST) include(GoogleTest) else() find_package(GTest CONFIG REQUIRED) add_library(gtest ALIAS GTest::gtest) add_library(gtest_main ALIAS GTest::gtest_main) add_library(gmock ALIAS GTest::gmock) add_library(gmock_main ALIAS GTest::gmock_main) endif() endif() add_subdirectory(test) endif() ================================================ FILE: CONTRIBUTING.md ================================================ # How to contribute # We'd love to accept your patches and contributions to this project. There are a just a few small guidelines you need to follow. ## Contributor License Agreement ## Contributions to any Google project must be accompanied by a Contributor License Agreement. This is not a copyright **assignment**, it simply gives Google permission to use and redistribute your contributions as part of the project. * If you are an individual writing original source code and you're sure you own the intellectual property, then you'll need to sign an [individual CLA][]. * If you work for a company that wants to allow you to contribute your work, then you'll need to sign a [corporate CLA][]. You generally only need to submit a CLA once, so if you've already submitted one (even if it was for a different project), you probably don't need to do it again. [individual CLA]: https://developers.google.com/open-source/cla/individual [corporate CLA]: https://developers.google.com/open-source/cla/corporate Once your CLA is submitted (or if you already submitted one for another Google project), make a commit adding yourself to the [AUTHORS][] and [CONTRIBUTORS][] files. This commit can be part of your first [pull request][]. [AUTHORS]: AUTHORS [CONTRIBUTORS]: CONTRIBUTORS ## Submitting a patch ## 1. It's generally best to start by opening a new issue describing the bug or feature you're intending to fix. Even if you think it's relatively minor, it's helpful to know what people are working on. Mention in the initial issue that you are planning to work on that bug or feature so that it can be assigned to you. 1. Follow the normal process of [forking][] the project, and setup a new branch to work in. It's important that each group of changes be done in separate branches in order to ensure that a pull request only includes the commits related to that bug or feature. 1. Do your best to have [well-formed commit messages][] for each change. This provides consistency throughout the project, and ensures that commit messages are able to be formatted properly by various git tools. 1. Finally, push the commits to your fork and submit a [pull request][]. [forking]: https://help.github.com/articles/fork-a-repo [well-formed commit messages]: http://tbaggery.com/2008/04/19/a-note-about-git-commit-messages.html [pull request]: https://help.github.com/articles/creating-a-pull-request ================================================ FILE: CONTRIBUTORS ================================================ # People who have agreed to one of the CLAs and can contribute patches. # The AUTHORS file lists the copyright holders; this file # lists people. For example, Google employees are listed here # but not in AUTHORS, because Google holds the copyright. # # Names should be added to this file only after verifying that # the individual or the individual's organization has agreed to # the appropriate Contributor License Agreement, found here: # # https://developers.google.com/open-source/cla/individual # https://developers.google.com/open-source/cla/corporate # # The agreement for individuals can be filled out on the web. # # When adding J Random Contributor's name to this file, # either J's name or J's organization's name should be # added to the AUTHORS file, depending on whether the # individual or corporate CLA was used. # # Names should be added to this file as: # Name # # Please keep the list sorted. Abhina Sreeskantharajan Albert Pretorius Alex Steele Andriy Berestovskyy Arne Beer Bátor Tallér Benjamin King Billy Robert O'Neal III Cezary Skrzyński Chris Kennelly Christian Wassermann Christopher Seymour Colin Braley Cyrille Faucheux Daniel Harvey David Coeurjolly Deniz Evrenci Dominic Hamon Dominik Czarnota Dominik Korman Donald Aingworth Doug Evans Eric Backus Eric Fiselier Eugene Zhuk Evgeny Safronov Fabien Pichot Fanbo Meng Federico Ficarelli Felix Homann Geoffrey Martin-Noble Gergely Meszaros Gergő Szitár Hannes Hauswedell Henrique Bucher Ismael Jimenez Martinez Iakov Sergeev Jern-Kuan Leong JianXiong Zhou Joao Paulo Magalhaes John Millikin Jordan Williams Jussi Knuuttila Kaito Udagawa Kai Wolf Kishan Kumar Kostiantyn Lazukin Lei Xu Marcel Jacobse Matt Clarkson Maxim Vafin Mike Apodaca Min-Yih Hsu Nick Hutchinson Norman Heino Oleksandr Sochka Olga Fadeeva Ori Livneh Pascal Leroy Paul Redmond Pierre Phaneuf Prithvi Rao Radoslav Yovchev Raghu Raja Rainer Orth Raul Marin Ray Glover Robert Guo Roman Lebedev Sayan Bhattacharjee Shashank Thakur ShengYi Hung Shuo Chen Steven Wan Tobias Schmidt Tobias Ulvgård Tom Madams Yixuan Qiu Yusuke Suzuki Zbigniew Skowron ================================================ FILE: LICENSE ================================================ Apache License Version 2.0, January 2004 http://www.apache.org/licenses/ TERMS AND CONDITIONS FOR USE, REPRODUCTION, AND DISTRIBUTION 1. Definitions. "License" shall mean the terms and conditions for use, reproduction, and distribution as defined by Sections 1 through 9 of this document. "Licensor" shall mean the copyright owner or entity authorized by the copyright owner that is granting the License. "Legal Entity" shall mean the union of the acting entity and all other entities that control, are controlled by, or are under common control with that entity. For the purposes of this definition, "control" means (i) the power, direct or indirect, to cause the direction or management of such entity, whether by contract or otherwise, or (ii) ownership of fifty percent (50%) or more of the outstanding shares, or (iii) beneficial ownership of such entity. "You" (or "Your") shall mean an individual or Legal Entity exercising permissions granted by this License. "Source" form shall mean the preferred form for making modifications, including but not limited to software source code, documentation source, and configuration files. "Object" form shall mean any form resulting from mechanical transformation or translation of a Source form, including but not limited to compiled object code, generated documentation, and conversions to other media types. "Work" shall mean the work of authorship, whether in Source or Object form, made available under the License, as indicated by a copyright notice that is included in or attached to the work (an example is provided in the Appendix below). "Derivative Works" shall mean any work, whether in Source or Object form, that is based on (or derived from) the Work and for which the editorial revisions, annotations, elaborations, or other modifications represent, as a whole, an original work of authorship. For the purposes of this License, Derivative Works shall not include works that remain separable from, or merely link (or bind by name) to the interfaces of, the Work and Derivative Works thereof. "Contribution" shall mean any work of authorship, including the original version of the Work and any modifications or additions to that Work or Derivative Works thereof, that is intentionally submitted to Licensor for inclusion in the Work by the copyright owner or by an individual or Legal Entity authorized to submit on behalf of the copyright owner. For the purposes of this definition, "submitted" means any form of electronic, verbal, or written communication sent to the Licensor or its representatives, including but not limited to communication on electronic mailing lists, source code control systems, and issue tracking systems that are managed by, or on behalf of, the Licensor for the purpose of discussing and improving the Work, but excluding communication that is conspicuously marked or otherwise designated in writing by the copyright owner as "Not a Contribution." "Contributor" shall mean Licensor and any individual or Legal Entity on behalf of whom a Contribution has been received by Licensor and subsequently incorporated within the Work. 2. Grant of Copyright License. Subject to the terms and conditions of this License, each Contributor hereby grants to You a perpetual, worldwide, non-exclusive, no-charge, royalty-free, irrevocable copyright license to reproduce, prepare Derivative Works of, publicly display, publicly perform, sublicense, and distribute the Work and such Derivative Works in Source or Object form. 3. Grant of Patent License. Subject to the terms and conditions of this License, each Contributor hereby grants to You a perpetual, worldwide, non-exclusive, no-charge, royalty-free, irrevocable (except as stated in this section) patent license to make, have made, use, offer to sell, sell, import, and otherwise transfer the Work, where such license applies only to those patent claims licensable by such Contributor that are necessarily infringed by their Contribution(s) alone or by combination of their Contribution(s) with the Work to which such Contribution(s) was submitted. If You institute patent litigation against any entity (including a cross-claim or counterclaim in a lawsuit) alleging that the Work or a Contribution incorporated within the Work constitutes direct or contributory patent infringement, then any patent licenses granted to You under this License for that Work shall terminate as of the date such litigation is filed. 4. Redistribution. You may reproduce and distribute copies of the Work or Derivative Works thereof in any medium, with or without modifications, and in Source or Object form, provided that You meet the following conditions: (a) You must give any other recipients of the Work or Derivative Works a copy of this License; and (b) You must cause any modified files to carry prominent notices stating that You changed the files; and (c) You must retain, in the Source form of any Derivative Works that You distribute, all copyright, patent, trademark, and attribution notices from the Source form of the Work, excluding those notices that do not pertain to any part of the Derivative Works; and (d) If the Work includes a "NOTICE" text file as part of its distribution, then any Derivative Works that You distribute must include a readable copy of the attribution notices contained within such NOTICE file, excluding those notices that do not pertain to any part of the Derivative Works, in at least one of the following places: within a NOTICE text file distributed as part of the Derivative Works; within the Source form or documentation, if provided along with the Derivative Works; or, within a display generated by the Derivative Works, if and wherever such third-party notices normally appear. The contents of the NOTICE file are for informational purposes only and do not modify the License. You may add Your own attribution notices within Derivative Works that You distribute, alongside or as an addendum to the NOTICE text from the Work, provided that such additional attribution notices cannot be construed as modifying the License. You may add Your own copyright statement to Your modifications and may provide additional or different license terms and conditions for use, reproduction, or distribution of Your modifications, or for any such Derivative Works as a whole, provided Your use, reproduction, and distribution of the Work otherwise complies with the conditions stated in this License. 5. Submission of Contributions. Unless You explicitly state otherwise, any Contribution intentionally submitted for inclusion in the Work by You to the Licensor shall be under the terms and conditions of this License, without any additional terms or conditions. Notwithstanding the above, nothing herein shall supersede or modify the terms of any separate license agreement you may have executed with Licensor regarding such Contributions. 6. Trademarks. This License does not grant permission to use the trade names, trademarks, service marks, or product names of the Licensor, except as required for reasonable and customary use in describing the origin of the Work and reproducing the content of the NOTICE file. 7. Disclaimer of Warranty. Unless required by applicable law or agreed to in writing, Licensor provides the Work (and each Contributor provides its Contributions) on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied, including, without limitation, any warranties or conditions of TITLE, NON-INFRINGEMENT, MERCHANTABILITY, or FITNESS FOR A PARTICULAR PURPOSE. You are solely responsible for determining the appropriateness of using or redistributing the Work and assume any risks associated with Your exercise of permissions under this License. 8. Limitation of Liability. In no event and under no legal theory, whether in tort (including negligence), contract, or otherwise, unless required by applicable law (such as deliberate and grossly negligent acts) or agreed to in writing, shall any Contributor be liable to You for damages, including any direct, indirect, special, incidental, or consequential damages of any character arising as a result of this License or out of the use or inability to use the Work (including but not limited to damages for loss of goodwill, work stoppage, computer failure or malfunction, or any and all other commercial damages or losses), even if such Contributor has been advised of the possibility of such damages. 9. Accepting Warranty or Additional Liability. While redistributing the Work or Derivative Works thereof, You may choose to offer, and charge a fee for, acceptance of support, warranty, indemnity, or other liability obligations and/or rights consistent with this License. However, in accepting such obligations, You may act only on Your own behalf and on Your sole responsibility, not on behalf of any other Contributor, and only if You agree to indemnify, defend, and hold each Contributor harmless for any liability incurred by, or claims asserted against, such Contributor by reason of your accepting any such warranty or additional liability. END OF TERMS AND CONDITIONS APPENDIX: How to apply the Apache License to your work. To apply the Apache License to your work, attach the following boilerplate notice, with the fields enclosed by brackets "[]" replaced with your own identifying information. (Don't include the brackets!) The text should be enclosed in the appropriate comment syntax for the file format. We also recommend that a file or class name and description of purpose be included on the same "printed page" as the copyright notice for easier identification within third-party archives. Copyright [yyyy] [name of copyright owner] Licensed under the Apache License, Version 2.0 (the "License"); you may not use this file except in compliance with the License. You may obtain a copy of the License at http://www.apache.org/licenses/LICENSE-2.0 Unless required by applicable law or agreed to in writing, software distributed under the License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the License for the specific language governing permissions and limitations under the License. ================================================ FILE: MODULE.bazel ================================================ module( name = "google_benchmark", version = "1.9.5", ) bazel_dep(name = "bazel_skylib", version = "1.7.1") bazel_dep(name = "platforms", version = "0.0.10") bazel_dep(name = "rules_cc", version = "0.0.9") bazel_dep(name = "rules_python", version = "1.0.0", dev_dependency = True) bazel_dep(name = "googletest", version = "1.14.0", dev_dependency = True, repo_name = "com_google_googletest") bazel_dep(name = "libpfm", version = "4.11.0.bcr.1") # Register a toolchain for Python 3.9 to be able to build numpy. Python # versions >=3.10 are problematic. # A second reason for this is to be able to build Python hermetically instead # of relying on the changing default version from rules_python. python = use_extension("@rules_python//python/extensions:python.bzl", "python", dev_dependency = True) python.toolchain(python_version = "3.8") python.toolchain(python_version = "3.9") python.toolchain(python_version = "3.10") python.toolchain(python_version = "3.11") python.toolchain( is_default = True, python_version = "3.12", ) python.toolchain(python_version = "3.13") pip = use_extension("@rules_python//python/extensions:pip.bzl", "pip", dev_dependency = True) pip.parse( hub_name = "tools_pip_deps", python_version = "3.12", requirements_lock = "//tools:requirements.txt", ) use_repo(pip, "tools_pip_deps") # -- bazel_dep definitions -- # bazel_dep(name = "nanobind_bazel", version = "2.9.2", dev_dependency = True) ================================================ FILE: README.md ================================================ # Benchmark [![build-and-test](https://github.com/google/benchmark/workflows/build-and-test/badge.svg)](https://github.com/google/benchmark/actions?query=workflow%3Abuild-and-test) [![bazel](https://github.com/google/benchmark/actions/workflows/bazel.yml/badge.svg)](https://github.com/google/benchmark/actions/workflows/bazel.yml) [![test-bindings](https://github.com/google/benchmark/workflows/test-bindings/badge.svg)](https://github.com/google/benchmark/actions?query=workflow%3Atest-bindings) [![Coverage Status](https://coveralls.io/repos/google/benchmark/badge.svg)](https://coveralls.io/r/google/benchmark) [![OpenSSF Scorecard](https://api.securityscorecards.dev/projects/github.com/google/benchmark/badge)](https://securityscorecards.dev/viewer/?uri=github.com/google/benchmark) [![Discord](https://discordapp.com/api/guilds/1125694995928719494/widget.png?style=shield)](https://discord.gg/cz7UX7wKC2) A library to benchmark code snippets, similar to unit tests. Example: ```c++ #include #include static void BM_SomeFunction(benchmark::State& state) { // Perform setup here for (auto _ : state) { // This code gets timed SomeFunction(); } } // Register the function as a benchmark BENCHMARK(BM_SomeFunction); // Run the benchmark BENCHMARK_MAIN(); ``` ## Getting Started To get started, see [Requirements](#requirements) and [Installation](#installation). See [Usage](#usage) for a full example and the [User Guide](docs/user_guide.md) for a more comprehensive feature overview. It may also help to read the [Google Test documentation](https://github.com/google/googletest/blob/main/docs/primer.md) as some of the structural aspects of the APIs are similar. ## Resources [Discussion group](https://groups.google.com/d/forum/benchmark-discuss) IRC channels: * [libera](https://libera.chat) #benchmark [Additional Tooling Documentation](docs/tools.md) [Assembly Testing Documentation](docs/AssemblyTests.md) [Building and installing Python bindings](docs/python_bindings.md) ## Requirements The library can be used with C++11. However, it requires C++17 to build, including compiler and standard library support. _See [dependencies.md](docs/dependencies.md) for more details regarding supported compilers and standards._ If you have need for a particular compiler to be supported, patches are very welcome. See [Platform-Specific Build Instructions](docs/platform_specific_build_instructions.md). ## Installation This describes the installation process using cmake. As pre-requisites, you'll need git and cmake installed. _See [dependencies.md](docs/dependencies.md) for more details regarding supported versions of build tools._ ```bash # Check out the library. $ git clone https://github.com/google/benchmark.git # Go to the library root directory $ cd benchmark # Make a build directory to place the build output. $ cmake -E make_directory "build" # Generate build system files with cmake, and download any dependencies. $ cmake -E chdir "build" cmake -DBENCHMARK_DOWNLOAD_DEPENDENCIES=on -DCMAKE_BUILD_TYPE=Release ../ # or, starting with CMake 3.13, use a simpler form: # cmake -DBENCHMARK_DOWNLOAD_DEPENDENCIES=on -DCMAKE_BUILD_TYPE=Release -S . -B "build" # Build the library. $ cmake --build "build" --config Release ``` This builds the `benchmark` and `benchmark_main` libraries and tests. On a unix system, the build directory should now look something like this: ``` /benchmark /build /src /libbenchmark.a /libbenchmark_main.a /test ... ``` Next, you can run the tests to check the build. ```bash $ cmake -E chdir "build" ctest --build-config Release ``` If you want to install the library globally, also run: ``` sudo cmake --build "build" --config Release --target install ``` Note that Google Benchmark requires Google Test to build and run the tests. This dependency can be provided two ways: * Checkout the Google Test sources into `benchmark/googletest`. * Otherwise, if `-DBENCHMARK_DOWNLOAD_DEPENDENCIES=ON` is specified during configuration as above, the library will automatically download and build any required dependencies. If you do not wish to build and run the tests, add `-DBENCHMARK_ENABLE_GTEST_TESTS=OFF` to `CMAKE_ARGS`. ### Debug vs Release By default, benchmark builds as a debug library. You will see a warning in the output when this is the case. To build it as a release library instead, add `-DCMAKE_BUILD_TYPE=Release` when generating the build system files, as shown above. The use of `--config Release` in build commands is needed to properly support multi-configuration tools (like Visual Studio for example) and can be skipped for other build systems (like Makefile). To enable link-time optimisation, also add `-DBENCHMARK_ENABLE_LTO=true` when generating the build system files. If you are using gcc, you might need to set `GCC_AR` and `GCC_RANLIB` cmake cache variables, if autodetection fails. If you are using clang, you may need to set `LLVMAR_EXECUTABLE`, `LLVMNM_EXECUTABLE` and `LLVMRANLIB_EXECUTABLE` cmake cache variables. To enable sanitizer checks (eg., `asan` and `tsan`), add: ``` -DCMAKE_C_FLAGS="-g -O2 -fno-omit-frame-pointer -fsanitize=address -fsanitize=thread -fno-sanitize-recover=all" -DCMAKE_CXX_FLAGS="-g -O2 -fno-omit-frame-pointer -fsanitize=address -fsanitize=thread -fno-sanitize-recover=all " ``` ### Stable and Experimental Library Versions The main branch contains the latest stable version of the benchmarking library; the API of which can be considered largely stable, with source breaking changes being made only upon the release of a new major version. Newer, experimental, features are implemented and tested on the [`v2` branch](https://github.com/google/benchmark/tree/v2). Users who wish to use, test, and provide feedback on the new features are encouraged to try this branch. However, this branch provides no stability guarantees and reserves the right to change and break the API at any time. ## Usage ### Basic usage Define a function that executes the code to measure, register it as a benchmark function using the `BENCHMARK` macro, and ensure an appropriate `main` function is available: ```c++ #include static void BM_StringCreation(benchmark::State& state) { for (auto _ : state) std::string empty_string; } // Register the function as a benchmark BENCHMARK(BM_StringCreation); // Define another benchmark static void BM_StringCopy(benchmark::State& state) { std::string x = "hello"; for (auto _ : state) std::string copy(x); } BENCHMARK(BM_StringCopy); BENCHMARK_MAIN(); ``` To run the benchmark, compile and link against the `benchmark` library (libbenchmark.a/.so). If you followed the build steps above, this library will be under the build directory you created. ```bash # Example on linux after running the build steps above. Assumes the # `benchmark` and `build` directories are under the current directory. $ g++ mybenchmark.cc -std=c++11 -isystem benchmark/include \ -Lbenchmark/build/src -lbenchmark -lpthread -o mybenchmark ``` Alternatively, link against the `benchmark_main` library and remove `BENCHMARK_MAIN();` above to get the same behavior. The compiled executable will run all benchmarks by default. Pass the `--help` flag for option information or see the [User Guide](docs/user_guide.md). ### Usage with CMake If using CMake, it is recommended to link against the project-provided `benchmark::benchmark` and `benchmark::benchmark_main` targets using `target_link_libraries`. It is possible to use ```find_package``` to import an installed version of the library. ```cmake find_package(benchmark REQUIRED) ``` Alternatively, ```add_subdirectory``` will incorporate the library directly in to one's CMake project. ```cmake add_subdirectory(benchmark) ``` Either way, link to the library as follows. ```cmake target_link_libraries(MyTarget benchmark::benchmark) ``` ================================================ FILE: WORKSPACE ================================================ workspace(name = "com_github_google_benchmark") load("//:bazel/benchmark_deps.bzl", "benchmark_deps") benchmark_deps() load("@rules_python//python:repositories.bzl", "py_repositories") py_repositories() load("@rules_python//python:pip.bzl", "pip_parse") pip_parse( name = "tools_pip_deps", requirements_lock = "//tools:requirements.txt", ) load("@tools_pip_deps//:requirements.bzl", "install_deps") install_deps() ================================================ FILE: WORKSPACE.bzlmod ================================================ # This file marks the root of the Bazel workspace. # See MODULE.bazel for dependencies and setup. ================================================ FILE: _config.yml ================================================ theme: jekyll-theme-midnight markdown: GFM ================================================ FILE: appveyor.yml ================================================ version: '{build}' image: Visual Studio 2017 configuration: - Debug - Release environment: matrix: - compiler: msvc-15-seh generator: "Visual Studio 15 2017" - compiler: msvc-15-seh generator: "Visual Studio 15 2017 Win64" - compiler: msvc-14-seh generator: "Visual Studio 14 2015" - compiler: msvc-14-seh generator: "Visual Studio 14 2015 Win64" - compiler: gcc-5.3.0-posix generator: "MinGW Makefiles" cxx_path: 'C:\mingw-w64\i686-5.3.0-posix-dwarf-rt_v4-rev0\mingw32\bin' APPVEYOR_BUILD_WORKER_IMAGE: Visual Studio 2015 matrix: fast_finish: true install: # git bash conflicts with MinGW makefiles - if "%generator%"=="MinGW Makefiles" (set "PATH=%PATH:C:\Program Files\Git\usr\bin;=%") - if not "%cxx_path%"=="" (set "PATH=%PATH%;%cxx_path%") build_script: - md _build -Force - cd _build - echo %configuration% - cmake -G "%generator%" "-DCMAKE_BUILD_TYPE=%configuration%" -DBENCHMARK_DOWNLOAD_DEPENDENCIES=ON .. - cmake --build . --config %configuration% test_script: - ctest --build-config %configuration% --timeout 300 --output-on-failure artifacts: - path: '_build/CMakeFiles/*.log' name: logs - path: '_build/Testing/**/*.xml' name: test_results ================================================ FILE: bazel/benchmark_deps.bzl ================================================ """ This file contains the Bazel build dependencies for Google Benchmark (both C++ source and Python bindings). """ load("@bazel_tools//tools/build_defs/repo:git.bzl", "new_git_repository") load("@bazel_tools//tools/build_defs/repo:http.bzl", "http_archive") def benchmark_deps(): """Loads dependencies required to build Google Benchmark.""" if "bazel_skylib" not in native.existing_rules(): http_archive( name = "bazel_skylib", sha256 = "cd55a062e763b9349921f0f5db8c3933288dc8ba4f76dd9416aac68acee3cb94", urls = [ "https://mirror.bazel.build/github.com/bazelbuild/bazel-skylib/releases/download/1.5.0/bazel-skylib-1.5.0.tar.gz", "https://github.com/bazelbuild/bazel-skylib/releases/download/1.5.0/bazel-skylib-1.5.0.tar.gz", ], ) if "rules_python" not in native.existing_rules(): http_archive( name = "rules_python", sha256 = "e85ae30de33625a63eca7fc40a94fea845e641888e52f32b6beea91e8b1b2793", strip_prefix = "rules_python-0.27.1", url = "https://github.com/bazelbuild/rules_python/releases/download/0.27.1/rules_python-0.27.1.tar.gz", ) if "com_google_googletest" not in native.existing_rules(): new_git_repository( name = "com_google_googletest", remote = "https://github.com/google/googletest.git", tag = "release-1.12.1", ) if "nanobind" not in native.existing_rules(): new_git_repository( name = "nanobind", remote = "https://github.com/wjakob/nanobind.git", tag = "v1.9.2", build_file = "@//bindings/python:nanobind.BUILD", recursive_init_submodules = True, ) if "libpfm" not in native.existing_rules(): # Downloaded from v4.9.0 tag at https://sourceforge.net/p/perfmon2/libpfm4/ref/master/tags/ http_archive( name = "libpfm", build_file = str(Label("//tools:libpfm.BUILD.bazel")), sha256 = "5da5f8872bde14b3634c9688d980f68bda28b510268723cc12973eedbab9fecc", type = "tar.gz", strip_prefix = "libpfm-4.11.0", urls = ["https://sourceforge.net/projects/perfmon2/files/libpfm4/libpfm-4.11.0.tar.gz/download"], ) ================================================ FILE: bindings/python/google_benchmark/BUILD ================================================ load("@nanobind_bazel//:build_defs.bzl", "nanobind_extension", "nanobind_stubgen") load("@rules_python//python:defs.bzl", "py_library", "py_test") py_library( name = "google_benchmark", srcs = ["__init__.py"], visibility = ["//visibility:public"], deps = [ ":_benchmark", ], ) nanobind_extension( name = "_benchmark", srcs = ["benchmark.cc"], deps = ["//:benchmark"], ) nanobind_stubgen( name = "benchmark_stubgen", marker_file = "bindings/python/google_benchmark/py.typed", module = ":_benchmark", ) py_test( name = "example", srcs = ["example.py"], python_version = "PY3", srcs_version = "PY3", visibility = ["//visibility:public"], deps = [ ":google_benchmark", ], ) ================================================ FILE: bindings/python/google_benchmark/__init__.py ================================================ # Copyright 2020 Google Inc. All rights reserved. # # Licensed under the Apache License, Version 2.0 (the "License"); # you may not use this file except in compliance with the License. # You may obtain a copy of the License at # # http://www.apache.org/licenses/LICENSE-2.0 # # Unless required by applicable law or agreed to in writing, software # distributed under the License is distributed on an "AS IS" BASIS, # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. # See the License for the specific language governing permissions and # limitations under the License. """Python benchmarking utilities. Example usage: import google_benchmark as benchmark @benchmark.register def my_benchmark(state): ... # Code executed outside `while` loop is not timed. while state: ... # Code executed within `while` loop is timed. if __name__ == '__main__': benchmark.main() """ import atexit from google_benchmark import _benchmark from google_benchmark._benchmark import ( Counter as Counter, State as State, kMicrosecond as kMicrosecond, kMillisecond as kMillisecond, kNanosecond as kNanosecond, kSecond as kSecond, o1 as o1, oAuto as oAuto, oLambda as oLambda, oLogN as oLogN, oN as oN, oNCubed as oNCubed, oNLogN as oNLogN, oNone as oNone, oNSquared as oNSquared, ) __version__ = "1.9.5" class __OptionMaker: """A stateless class to collect benchmark options. Collect all decorator calls like @option.range(start=0, limit=1<<5). """ class Options: """Pure data class to store options calls, along with the benchmarked function.""" def __init__(self, func): self.func = func self.builder_calls = [] @classmethod def make(cls, func_or_options): """Make Options from Options or the benchmarked function.""" if isinstance(func_or_options, cls.Options): return func_or_options return cls.Options(func_or_options) def __getattr__(self, builder_name): """Append option call in the Options.""" # The function that get returned on @option.range(start=0, limit=1<<5). def __builder_method(*args, **kwargs): # The decorator that get called, either with the benchmared function # or the previous Options def __decorator(func_or_options): options = self.make(func_or_options) options.builder_calls.append((builder_name, args, kwargs)) # The decorator returns Options so it is not technically a # decorator and needs a final call to @register return options return __decorator return __builder_method # Alias for nicer API. # We have to instantiate an object, even if stateless, to be able to use # __getattr__ on option.range option = __OptionMaker() def register(undefined=None, *, name=None): """Register function for benchmarking.""" if undefined is None: # Decorator is called without parenthesis so we return a decorator return lambda f: register(f, name=name) # We have either the function to benchmark (simple case) or an instance of # Options (@option._ case). options = __OptionMaker.make(undefined) if name is None: name = options.func.__name__ # We register the benchmark and reproduce all the @option._ calls onto the # benchmark builder pattern benchmark = _benchmark.RegisterBenchmark(name, options.func) for name, args, kwargs in options.builder_calls[::-1]: getattr(benchmark, name)(*args, **kwargs) # return the benchmarked function because the decorator does not modify it return options.func def main(argv: list[str] | None = None) -> None: import sys _benchmark.Initialize(argv or sys.argv) return _benchmark.RunSpecifiedBenchmarks() # FIXME: can we rerun with disabled ASLR? # Methods for use with custom main function. initialize = _benchmark.Initialize run_benchmarks = _benchmark.RunSpecifiedBenchmarks add_custom_context = _benchmark.AddCustomContext atexit.register(_benchmark.ClearRegisteredBenchmarks) ================================================ FILE: bindings/python/google_benchmark/benchmark.cc ================================================ // Benchmark for Python. #include "benchmark/benchmark.h" #include "nanobind/nanobind.h" #include "nanobind/operators.h" #include "nanobind/stl/bind_map.h" #include "nanobind/stl/string.h" #include "nanobind/stl/vector.h" NB_MAKE_OPAQUE(benchmark::UserCounters); namespace { namespace nb = nanobind; std::vector Initialize(const std::vector& argv) { std::vector ptrs; ptrs.reserve(argv.size()); for (auto& arg : argv) { ptrs.push_back(const_cast(arg.c_str())); } if (!ptrs.empty()) { // The `argv` pointers here become invalid when this function returns, but // benchmark holds the pointer to `argv[0]`. We create a static copy of it // so it persists, and replace the pointer below. static std::string executable_name(argv[0]); ptrs[0] = const_cast(executable_name.c_str()); } int argc = static_cast(argv.size()); benchmark::Initialize(&argc, ptrs.data()); std::vector remaining_argv; remaining_argv.reserve(argc); for (int i = 0; i < argc; ++i) { remaining_argv.emplace_back(ptrs[i]); } return remaining_argv; } benchmark::Benchmark* RegisterBenchmark(const std::string& name, nb::callable f) { return benchmark::RegisterBenchmark( name, [f](benchmark::State& state) { f(&state); }); } NB_MODULE(_benchmark, m) { using benchmark::TimeUnit; nb::enum_(m, "TimeUnit") .value("kNanosecond", TimeUnit::kNanosecond) .value("kMicrosecond", TimeUnit::kMicrosecond) .value("kMillisecond", TimeUnit::kMillisecond) .value("kSecond", TimeUnit::kSecond) .export_values(); using benchmark::BigO; nb::enum_(m, "BigO") .value("oNone", BigO::oNone) .value("o1", BigO::o1) .value("oN", BigO::oN) .value("oNSquared", BigO::oNSquared) .value("oNCubed", BigO::oNCubed) .value("oLogN", BigO::oLogN) .value("oNLogN", BigO::oNLogN) .value("oAuto", BigO::oAuto) .value("oLambda", BigO::oLambda) .export_values(); using benchmark::Benchmark; nb::class_(m, "Benchmark") // For methods returning a pointer to the current object, reference // return policy is used to ask nanobind not to take ownership of the // returned object and avoid calling delete on it. // https://pybind11.readthedocs.io/en/stable/advanced/functions.html#return-value-policies // // For methods taking a const std::vector<...>&, a copy is created // because a it is bound to a Python list. // https://pybind11.readthedocs.io/en/stable/advanced/cast/stl.html .def("unit", &Benchmark::Unit, nb::rv_policy::reference) .def("arg", &Benchmark::Arg, nb::rv_policy::reference) .def("args", &Benchmark::Args, nb::rv_policy::reference) .def("range", &Benchmark::Range, nb::rv_policy::reference, nb::arg("start"), nb::arg("limit")) .def("dense_range", &Benchmark::DenseRange, nb::rv_policy::reference, nb::arg("start"), nb::arg("limit"), nb::arg("step") = 1) .def("ranges", &Benchmark::Ranges, nb::rv_policy::reference) .def("args_product", &Benchmark::ArgsProduct, nb::rv_policy::reference) .def("arg_name", &Benchmark::ArgName, nb::rv_policy::reference) .def("arg_names", &Benchmark::ArgNames, nb::rv_policy::reference) .def("range_pair", &Benchmark::RangePair, nb::rv_policy::reference, nb::arg("lo1"), nb::arg("hi1"), nb::arg("lo2"), nb::arg("hi2")) .def("range_multiplier", &Benchmark::RangeMultiplier, nb::rv_policy::reference) .def("min_time", &Benchmark::MinTime, nb::rv_policy::reference) .def("min_warmup_time", &Benchmark::MinWarmUpTime, nb::rv_policy::reference) .def("iterations", &Benchmark::Iterations, nb::rv_policy::reference) .def("repetitions", &Benchmark::Repetitions, nb::rv_policy::reference) .def("report_aggregates_only", &Benchmark::ReportAggregatesOnly, nb::rv_policy::reference, nb::arg("value") = true) .def("display_aggregates_only", &Benchmark::DisplayAggregatesOnly, nb::rv_policy::reference, nb::arg("value") = true) .def("measure_process_cpu_time", &Benchmark::MeasureProcessCPUTime, nb::rv_policy::reference) .def("use_real_time", &Benchmark::UseRealTime, nb::rv_policy::reference) .def("use_manual_time", &Benchmark::UseManualTime, nb::rv_policy::reference) .def( "complexity", (Benchmark * (Benchmark::*)(benchmark::BigO)) & Benchmark::Complexity, nb::rv_policy::reference, nb::arg("complexity") = benchmark::oAuto); using benchmark::Counter; nb::class_ py_counter(m, "Counter"); nb::enum_(py_counter, "Flags", nb::is_arithmetic(), nb::is_flag()) .value("kDefaults", Counter::Flags::kDefaults) .value("kIsRate", Counter::Flags::kIsRate) .value("kAvgThreads", Counter::Flags::kAvgThreads) .value("kAvgThreadsRate", Counter::Flags::kAvgThreadsRate) .value("kIsIterationInvariant", Counter::Flags::kIsIterationInvariant) .value("kIsIterationInvariantRate", Counter::Flags::kIsIterationInvariantRate) .value("kAvgIterations", Counter::Flags::kAvgIterations) .value("kAvgIterationsRate", Counter::Flags::kAvgIterationsRate) .value("kInvert", Counter::Flags::kInvert) .export_values(); nb::enum_(py_counter, "OneK") .value("kIs1000", Counter::OneK::kIs1000) .value("kIs1024", Counter::OneK::kIs1024) .export_values(); py_counter .def(nb::init(), nb::arg("value") = 0., nb::arg("flags") = Counter::kDefaults, nb::arg("k") = Counter::kIs1000) .def("__init__", ([](Counter* c, double value) { new (c) Counter(value); })) .def_rw("value", &Counter::value) .def_rw("flags", &Counter::flags) .def_rw("oneK", &Counter::oneK) .def(nb::init_implicit()); nb::implicitly_convertible(); nb::bind_map(m, "UserCounters"); using benchmark::State; nb::class_(m, "State") .def("__bool__", &State::KeepRunning) .def_prop_ro("keep_running", &State::KeepRunning) .def("pause_timing", &State::PauseTiming) .def("resume_timing", &State::ResumeTiming) .def("skip_with_error", &State::SkipWithError) .def_prop_ro("error_occurred", &State::error_occurred) .def("set_iteration_time", &State::SetIterationTime) .def_prop_rw("bytes_processed", &State::bytes_processed, &State::SetBytesProcessed) .def_prop_rw("complexity_n", &State::complexity_length_n, &State::SetComplexityN) .def_prop_rw("items_processed", &State::items_processed, &State::SetItemsProcessed) .def("set_label", &State::SetLabel) .def( "range", [](const State& state, std::size_t pos = 0) -> int64_t { if (pos < state.range_size()) { return state.range(pos); } throw nb::index_error("pos is out of range"); }, nb::arg("pos") = 0) .def_prop_ro("iterations", &State::iterations) .def_prop_ro("name", &State::name) .def_rw("counters", &State::counters) .def_prop_ro("thread_index", &State::thread_index) .def_prop_ro("threads", &State::threads); m.def("Initialize", Initialize); m.def("RegisterBenchmark", RegisterBenchmark, nb::rv_policy::reference); m.def("RunSpecifiedBenchmarks", []() { benchmark::RunSpecifiedBenchmarks(); }); m.def("ClearRegisteredBenchmarks", benchmark::ClearRegisteredBenchmarks); m.def("AddCustomContext", benchmark::AddCustomContext, nb::arg("key"), nb::arg("value"), "Add a key-value pair to output as part of the context stanza in the " "report."); }; } // namespace ================================================ FILE: bindings/python/google_benchmark/example.py ================================================ # Copyright 2020 Google Inc. All rights reserved. # # Licensed under the Apache License, Version 2.0 (the "License"); # you may not use this file except in compliance with the License. # You may obtain a copy of the License at # # http://www.apache.org/licenses/LICENSE-2.0 # # Unless required by applicable law or agreed to in writing, software # distributed under the License is distributed on an "AS IS" BASIS, # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. # See the License for the specific language governing permissions and # limitations under the License. """Example of Python using C++ benchmark framework. To run this example, you must first install the `google_benchmark` Python package. To install using `setup.py`, download and extract the `google_benchmark` source. In the extracted directory, execute: python setup.py install """ import random import sys import time import google_benchmark as benchmark from google_benchmark import Counter @benchmark.register def empty(state): while state: pass @benchmark.register def sum_million(state): while state: sum(range(1_000_000)) @benchmark.register def pause_timing(state): """Pause timing every iteration.""" while state: # Construct a list of random ints every iteration without timing it state.pause_timing() random_list = [random.randint(0, 100) for _ in range(100)] state.resume_timing() # Time the in place sorting algorithm random_list.sort() @benchmark.register def skipped(state): if True: # Test some predicate here. state.skip_with_error("some error") return # NOTE: You must explicitly return, or benchmark will continue. # Benchmark code would be here. @benchmark.register @benchmark.option.use_manual_time() def manual_timing(state): while state: # Manually count Python CPU time start = time.perf_counter() # perf_counter_ns() in Python 3.7+ # Something to benchmark time.sleep(0.01) end = time.perf_counter() state.set_iteration_time(end - start) @benchmark.register def custom_counters(state): """Collect custom metric using benchmark.Counter.""" num_foo = 0.0 while state: # Benchmark some code here # Collect some custom metric named foo num_foo += 0.13 # Automatic Counter from numbers. state.counters["foo"] = num_foo # Set a counter as a rate. state.counters["foo_rate"] = Counter(num_foo, Counter.kIsRate) # Set a counter as an inverse of rate. state.counters["foo_inv_rate"] = Counter( num_foo, Counter.kIsRate | Counter.kInvert ) # Set a counter as a thread-average quantity. state.counters["foo_avg"] = Counter(num_foo, Counter.kAvgThreads) # There's also a combined flag: state.counters["foo_avg_rate"] = Counter(num_foo, Counter.kAvgThreadsRate) @benchmark.register @benchmark.option.measure_process_cpu_time() @benchmark.option.use_real_time() def with_options(state): while state: sum(range(1_000_000)) @benchmark.register(name="sum_million_microseconds") @benchmark.option.unit(benchmark.kMicrosecond) def with_options2(state): while state: sum(range(1_000_000)) @benchmark.register @benchmark.option.arg(100) @benchmark.option.arg(1000) def passing_argument(state): while state: sum(range(state.range(0))) @benchmark.register @benchmark.option.range(8, limit=8 << 10) def using_range(state): while state: sum(range(state.range(0))) @benchmark.register @benchmark.option.range_multiplier(2) @benchmark.option.range(1 << 10, 1 << 18) @benchmark.option.complexity(benchmark.oN) def computing_complexity(state): while state: sum(range(state.range(0))) state.complexity_n = state.range(0) if __name__ == "__main__": benchmark.add_custom_context("python", sys.version) benchmark.main() ================================================ FILE: cmake/AddCXXCompilerFlag.cmake ================================================ # - Adds a compiler flag if it is supported by the compiler # # This function checks that the supplied compiler flag is supported and then # adds it to the corresponding compiler flags # # add_cxx_compiler_flag( []) # # - Example # # include(AddCXXCompilerFlag) # add_cxx_compiler_flag(-Wall) # add_cxx_compiler_flag(-no-strict-aliasing RELEASE) # Requires CMake 2.6+ if(__add_cxx_compiler_flag) return() endif() set(__add_cxx_compiler_flag INCLUDED) include(CheckCXXCompilerFlag) function(mangle_compiler_flag FLAG OUTPUT) string(TOUPPER "HAVE_CXX_FLAG_${FLAG}" SANITIZED_FLAG) string(REPLACE "+" "X" SANITIZED_FLAG ${SANITIZED_FLAG}) string(REGEX REPLACE "[^A-Za-z_0-9]" "_" SANITIZED_FLAG ${SANITIZED_FLAG}) string(REGEX REPLACE "_+" "_" SANITIZED_FLAG ${SANITIZED_FLAG}) set(${OUTPUT} "${SANITIZED_FLAG}" PARENT_SCOPE) endfunction(mangle_compiler_flag) function(add_cxx_compiler_flag FLAG) mangle_compiler_flag("${FLAG}" MANGLED_FLAG) set(OLD_CMAKE_REQUIRED_FLAGS "${CMAKE_REQUIRED_FLAGS}") set(CMAKE_REQUIRED_FLAGS "${CMAKE_REQUIRED_FLAGS} ${FLAG}") check_cxx_compiler_flag("${FLAG}" ${MANGLED_FLAG}) set(CMAKE_REQUIRED_FLAGS "${OLD_CMAKE_REQUIRED_FLAGS}") if(${MANGLED_FLAG}) if(ARGC GREATER 1) set(VARIANT ${ARGV1}) string(TOUPPER "_${VARIANT}" VARIANT) else() set(VARIANT "") endif() set(CMAKE_CXX_FLAGS${VARIANT} "${CMAKE_CXX_FLAGS${VARIANT}} ${BENCHMARK_CXX_FLAGS${VARIANT}} ${FLAG}" PARENT_SCOPE) endif() endfunction() function(add_required_cxx_compiler_flag FLAG) mangle_compiler_flag("${FLAG}" MANGLED_FLAG) set(OLD_CMAKE_REQUIRED_FLAGS "${CMAKE_REQUIRED_FLAGS}") set(CMAKE_REQUIRED_FLAGS "${CMAKE_REQUIRED_FLAGS} ${FLAG}") check_cxx_compiler_flag("${FLAG}" ${MANGLED_FLAG}) set(CMAKE_REQUIRED_FLAGS "${OLD_CMAKE_REQUIRED_FLAGS}") if(${MANGLED_FLAG}) if(ARGC GREATER 1) set(VARIANT ${ARGV1}) string(TOUPPER "_${VARIANT}" VARIANT) else() set(VARIANT "") endif() set(CMAKE_CXX_FLAGS${VARIANT} "${CMAKE_CXX_FLAGS${VARIANT}} ${FLAG}" PARENT_SCOPE) set(CMAKE_EXE_LINKER_FLAGS "${CMAKE_EXE_LINKER_FLAGS} ${FLAG}" PARENT_SCOPE) set(CMAKE_SHARED_LINKER_FLAGS "${CMAKE_SHARED_LINKER_FLAGS} ${FLAG}" PARENT_SCOPE) set(CMAKE_MODULE_LINKER_FLAGS "${CMAKE_MODULE_LINKER_FLAGS} ${FLAG}" PARENT_SCOPE) set(CMAKE_REQUIRED_FLAGS "${CMAKE_REQUIRED_FLAGS} ${FLAG}" PARENT_SCOPE) else() message(FATAL_ERROR "Required flag '${FLAG}' is not supported by the compiler") endif() endfunction() function(check_cxx_warning_flag FLAG) mangle_compiler_flag("${FLAG}" MANGLED_FLAG) set(OLD_CMAKE_REQUIRED_FLAGS "${CMAKE_REQUIRED_FLAGS}") # Add -Werror to ensure the compiler generates an error if the warning flag # doesn't exist. set(CMAKE_REQUIRED_FLAGS "${CMAKE_REQUIRED_FLAGS} -Werror ${FLAG}") check_cxx_compiler_flag("${FLAG}" ${MANGLED_FLAG}) set(CMAKE_REQUIRED_FLAGS "${OLD_CMAKE_REQUIRED_FLAGS}") endfunction() ================================================ FILE: cmake/CXXFeatureCheck.cmake ================================================ # - Compile and run code to check for C++ features # # This functions compiles a source file under the `cmake` folder # and adds the corresponding `HAVE_[FILENAME]` flag to the CMake # environment # # cxx_feature_check( []) # # - Example # # include(CXXFeatureCheck) # cxx_feature_check(STD_REGEX) # Requires CMake 2.8.12+ if(__cxx_feature_check) return() endif() set(__cxx_feature_check INCLUDED) option(CXXFEATURECHECK_DEBUG OFF) function(cxx_feature_check FILE) string(TOLOWER ${FILE} FILE) string(TOUPPER ${FILE} VAR) string(TOUPPER "HAVE_${VAR}" FEATURE) if (DEFINED HAVE_${VAR}) if(HAVE_${VAR}) add_definitions(-DHAVE_${VAR}) endif() return() endif() set(FEATURE_CHECK_CMAKE_FLAGS ${BENCHMARK_CXX_LINKER_FLAGS}) if (ARGC GREATER 1) message(STATUS "Enabling additional flags: ${ARGV1}") list(APPEND FEATURE_CHECK_CMAKE_FLAGS ${ARGV1}) endif() if (NOT DEFINED COMPILE_${FEATURE}) if(CMAKE_CROSSCOMPILING) message(STATUS "Cross-compiling to test ${FEATURE}") try_compile(COMPILE_${FEATURE} ${CMAKE_BINARY_DIR} ${CMAKE_CURRENT_SOURCE_DIR}/cmake/${FILE}.cpp CXX_STANDARD 17 CXX_STANDARD_REQUIRED ON CMAKE_FLAGS ${FEATURE_CHECK_CMAKE_FLAGS} LINK_LIBRARIES ${BENCHMARK_CXX_LIBRARIES} OUTPUT_VARIABLE COMPILE_OUTPUT_VAR) if(COMPILE_${FEATURE}) message(WARNING "If you see build failures due to cross compilation, try setting HAVE_${VAR} to 0") set(RUN_${FEATURE} 0 CACHE INTERNAL "") else() set(RUN_${FEATURE} 1 CACHE INTERNAL "") endif() else() message(STATUS "Compiling and running to test ${FEATURE}") try_run(RUN_${FEATURE} COMPILE_${FEATURE} ${CMAKE_BINARY_DIR} ${CMAKE_CURRENT_SOURCE_DIR}/cmake/${FILE}.cpp CXX_STANDARD 17 CXX_STANDARD_REQUIRED ON CMAKE_FLAGS ${FEATURE_CHECK_CMAKE_FLAGS} LINK_LIBRARIES ${BENCHMARK_CXX_LIBRARIES} COMPILE_OUTPUT_VARIABLE COMPILE_OUTPUT_VAR) endif() endif() if(COMPILE_${FEATURE}) if(DEFINED RUN_${FEATURE} AND RUN_${FEATURE} EQUAL 0) message(STATUS "Performing Test ${FEATURE} -- success") set(HAVE_${VAR} 1 CACHE INTERNAL "") add_definitions(-DHAVE_${VAR}) else() message(STATUS "Performing Test ${FEATURE} -- compiled but failed to run") set(HAVE_${VAR} 0 CACHE INTERNAL "") endif() else() if(CXXFEATURECHECK_DEBUG) message(STATUS "Performing Test ${FEATURE} -- failed to compile: ${COMPILE_OUTPUT_VAR}") else() message(STATUS "Performing Test ${FEATURE} -- failed to compile") endif() set(HAVE_${VAR} 0 CACHE INTERNAL "") endif() endfunction() ================================================ FILE: cmake/Config.cmake.in ================================================ @PACKAGE_INIT@ include (CMakeFindDependencyMacro) find_dependency (Threads) if (@BENCHMARK_ENABLE_LIBPFM@) list(APPEND CMAKE_MODULE_PATH "${CMAKE_CURRENT_LIST_DIR}") find_dependency (PFM) endif() include("${CMAKE_CURRENT_LIST_DIR}/@targets_export_name@.cmake") ================================================ FILE: cmake/GetGitVersion.cmake ================================================ # - Returns a version string from Git tags # # This function inspects the annotated git tags for the project and returns a string # into a CMake variable # # get_git_version() # # - Example # # include(GetGitVersion) # get_git_version(GIT_VERSION) # # Requires CMake 2.8.11+ find_package(Git) if(__get_git_version) return() endif() set(__get_git_version INCLUDED) function(get_git_version var) if(GIT_EXECUTABLE) execute_process(COMMAND ${GIT_EXECUTABLE} describe --tags --match "v[0-9]*.[0-9]*.[0-9]*" --abbrev=8 --dirty WORKING_DIRECTORY ${PROJECT_SOURCE_DIR} RESULT_VARIABLE status OUTPUT_VARIABLE GIT_VERSION ERROR_QUIET) if(status) set(GIT_VERSION "v0.0.0") endif() else() set(GIT_VERSION "v0.0.0") endif() set(${var} ${GIT_VERSION} PARENT_SCOPE) endfunction() ================================================ FILE: cmake/GoogleTest.cmake ================================================ # Download and unpack googletest at configure time set(GOOGLETEST_PREFIX "${benchmark_BINARY_DIR}/third_party/googletest") configure_file(${benchmark_SOURCE_DIR}/cmake/GoogleTest.cmake.in ${GOOGLETEST_PREFIX}/CMakeLists.txt @ONLY) set(GOOGLETEST_PATH "${CMAKE_CURRENT_SOURCE_DIR}/googletest" CACHE PATH "") # Mind the quotes execute_process(COMMAND ${CMAKE_COMMAND} -G "${CMAKE_GENERATOR}" -DALLOW_DOWNLOADING_GOOGLETEST=${BENCHMARK_DOWNLOAD_DEPENDENCIES} -DGOOGLETEST_PATH:PATH=${GOOGLETEST_PATH} . RESULT_VARIABLE result WORKING_DIRECTORY ${GOOGLETEST_PREFIX} ) if(result) message(FATAL_ERROR "CMake step for googletest failed: ${result}") endif() execute_process( COMMAND ${CMAKE_COMMAND} --build . RESULT_VARIABLE result WORKING_DIRECTORY ${GOOGLETEST_PREFIX} ) if(result) message(FATAL_ERROR "Build step for googletest failed: ${result}") endif() # Prevent overriding the parent project's compiler/linker # settings on Windows set(gtest_force_shared_crt ON CACHE BOOL "" FORCE) include(${GOOGLETEST_PREFIX}/googletest-paths.cmake) # Add googletest directly to our build. This defines # the gtest and gtest_main targets. add_subdirectory(${GOOGLETEST_SOURCE_DIR} ${GOOGLETEST_BINARY_DIR} EXCLUDE_FROM_ALL) # googletest doesn't seem to want to stay build warning clean so let's not hurt ourselves. if (MSVC) target_compile_options(gtest PRIVATE "/wd4244" "/wd4722") target_compile_options(gtest_main PRIVATE "/wd4244" "/wd4722") target_compile_options(gmock PRIVATE "/wd4244" "/wd4722") target_compile_options(gmock_main PRIVATE "/wd4244" "/wd4722") else() target_compile_options(gtest PRIVATE "-w") target_compile_options(gtest_main PRIVATE "-w") target_compile_options(gmock PRIVATE "-w") target_compile_options(gmock_main PRIVATE "-w") endif() if(NOT DEFINED GTEST_COMPILE_COMMANDS) set(GTEST_COMPILE_COMMANDS ON) endif() set_target_properties(gtest PROPERTIES INTERFACE_SYSTEM_INCLUDE_DIRECTORIES $ EXPORT_COMPILE_COMMANDS ${GTEST_COMPILE_COMMANDS}) set_target_properties(gtest_main PROPERTIES INTERFACE_SYSTEM_INCLUDE_DIRECTORIES $ EXPORT_COMPILE_COMMANDS ${GTEST_COMPILE_COMMANDS}) set_target_properties(gmock PROPERTIES INTERFACE_SYSTEM_INCLUDE_DIRECTORIES $ EXPORT_COMPILE_COMMANDS ${GTEST_COMPILE_COMMANDS}) set_target_properties(gmock_main PROPERTIES INTERFACE_SYSTEM_INCLUDE_DIRECTORIES $ EXPORT_COMPILE_COMMANDS ${GTEST_COMPILE_COMMANDS}) ================================================ FILE: cmake/GoogleTest.cmake.in ================================================ cmake_minimum_required (VERSION 3.13...3.22) project(googletest-download NONE) # Enable ExternalProject CMake module include(ExternalProject) option(ALLOW_DOWNLOADING_GOOGLETEST "If googletest src tree is not found in location specified by GOOGLETEST_PATH, do fetch the archive from internet" OFF) set(GOOGLETEST_PATH "/usr/src/googletest" CACHE PATH "Path to the googletest root tree. Should contain googletest and googlemock subdirs. And CMakeLists.txt in root, and in both of these subdirs") # Download and install GoogleTest message(STATUS "Looking for Google Test sources") message(STATUS "Looking for Google Test sources in ${GOOGLETEST_PATH}") if(EXISTS "${GOOGLETEST_PATH}" AND IS_DIRECTORY "${GOOGLETEST_PATH}" AND EXISTS "${GOOGLETEST_PATH}/CMakeLists.txt" AND EXISTS "${GOOGLETEST_PATH}/googletest" AND IS_DIRECTORY "${GOOGLETEST_PATH}/googletest" AND EXISTS "${GOOGLETEST_PATH}/googletest/CMakeLists.txt" AND EXISTS "${GOOGLETEST_PATH}/googlemock" AND IS_DIRECTORY "${GOOGLETEST_PATH}/googlemock" AND EXISTS "${GOOGLETEST_PATH}/googlemock/CMakeLists.txt") message(STATUS "Found Google Test in ${GOOGLETEST_PATH}") ExternalProject_Add( googletest PREFIX "${CMAKE_BINARY_DIR}" DOWNLOAD_DIR "${CMAKE_BINARY_DIR}/download" SOURCE_DIR "${GOOGLETEST_PATH}" # use existing src dir. BINARY_DIR "${CMAKE_BINARY_DIR}/build" CONFIGURE_COMMAND "" BUILD_COMMAND "" INSTALL_COMMAND "" TEST_COMMAND "" ) else() if(NOT ALLOW_DOWNLOADING_GOOGLETEST) message(SEND_ERROR "Did not find Google Test sources! Either pass correct path in GOOGLETEST_PATH, or enable BENCHMARK_DOWNLOAD_DEPENDENCIES, or disable BENCHMARK_USE_BUNDLED_GTEST, or disable BENCHMARK_ENABLE_GTEST_TESTS / BENCHMARK_ENABLE_TESTING.") return() else() message(STATUS "Did not find Google Test sources! Fetching from web...") ExternalProject_Add( googletest GIT_REPOSITORY https://github.com/google/googletest.git GIT_TAG "v1.15.2" GIT_SHALLOW "ON" PREFIX "${CMAKE_BINARY_DIR}" STAMP_DIR "${CMAKE_BINARY_DIR}/stamp" DOWNLOAD_DIR "${CMAKE_BINARY_DIR}/download" SOURCE_DIR "${CMAKE_BINARY_DIR}/src" BINARY_DIR "${CMAKE_BINARY_DIR}/build" CONFIGURE_COMMAND "" BUILD_COMMAND "" INSTALL_COMMAND "" TEST_COMMAND "" ) endif() endif() ExternalProject_Get_Property(googletest SOURCE_DIR BINARY_DIR) file(WRITE googletest-paths.cmake "set(GOOGLETEST_SOURCE_DIR \"${SOURCE_DIR}\") set(GOOGLETEST_BINARY_DIR \"${BINARY_DIR}\") ") ================================================ FILE: cmake/benchmark.pc.in ================================================ prefix=@CMAKE_INSTALL_PREFIX@ exec_prefix=${prefix} libdir=@CMAKE_INSTALL_FULL_LIBDIR@ includedir=@CMAKE_INSTALL_FULL_INCLUDEDIR@ Name: @PROJECT_NAME@ Description: Google microbenchmark framework Version: @NORMALIZED_VERSION@ Libs: -L${libdir} -lbenchmark Libs.private: -lpthread @BENCHMARK_PRIVATE_LINK_LIBRARIES@ Cflags: -I${includedir} ================================================ FILE: cmake/benchmark_main.pc.in ================================================ libdir=@CMAKE_INSTALL_FULL_LIBDIR@ Name: @PROJECT_NAME@ Description: Google microbenchmark framework (with main() function) Version: @NORMALIZED_VERSION@ Requires: benchmark Libs: -L${libdir} -lbenchmark_main ================================================ FILE: cmake/gnu_posix_regex.cpp ================================================ #include #include int main() { std::string str = "test0159"; regex_t re; int ec = regcomp(&re, "^[a-z]+[0-9]+$", REG_EXTENDED | REG_NOSUB); if (ec != 0) { return ec; } return regexec(&re, str.c_str(), 0, nullptr, 0) ? -1 : 0; } ================================================ FILE: cmake/llvm-toolchain.cmake ================================================ find_package(LLVMAr REQUIRED) set(CMAKE_AR "${LLVMAR_EXECUTABLE}" CACHE FILEPATH "" FORCE) find_package(LLVMNm REQUIRED) set(CMAKE_NM "${LLVMNM_EXECUTABLE}" CACHE FILEPATH "" FORCE) find_package(LLVMRanLib REQUIRED) set(CMAKE_RANLIB "${LLVMRANLIB_EXECUTABLE}" CACHE FILEPATH "" FORCE) ================================================ FILE: cmake/posix_regex.cpp ================================================ #include #include int main() { std::string str = "test0159"; regex_t re; int ec = regcomp(&re, "^[a-z]+[0-9]+$", REG_EXTENDED | REG_NOSUB); if (ec != 0) { return ec; } int ret = regexec(&re, str.c_str(), 0, nullptr, 0) ? -1 : 0; regfree(&re); return ret; } ================================================ FILE: cmake/pthread_affinity.cpp ================================================ #include #ifdef __FreeBSD__ #include #endif int main() { cpu_set_t set; CPU_ZERO(&set); for (int i = 0; i < CPU_SETSIZE; ++i) { CPU_SET(i, &set); CPU_CLR(i, &set); } pthread_t self = pthread_self(); int ret; ret = pthread_getaffinity_np(self, sizeof(set), &set); if (ret != 0) return ret; ret = pthread_setaffinity_np(self, sizeof(set), &set); if (ret != 0) return ret; return 0; } ================================================ FILE: cmake/split_list.cmake ================================================ macro(split_list listname) string(REPLACE ";" " " ${listname} "${${listname}}") endmacro() ================================================ FILE: cmake/std_regex.cpp ================================================ #include #include int main() { const std::string str = "test0159"; std::regex re; re = std::regex("^[a-z]+[0-9]+$", std::regex_constants::extended | std::regex_constants::nosubs); return std::regex_search(str, re) ? 0 : -1; } ================================================ FILE: cmake/steady_clock.cpp ================================================ #include int main() { typedef std::chrono::steady_clock Clock; Clock::time_point tp = Clock::now(); ((void)tp); } ================================================ FILE: cmake/thread_safety_attributes.cpp ================================================ #define HAVE_THREAD_SAFETY_ATTRIBUTES #include "../src/mutex.h" int main() {} ================================================ FILE: docs/AssemblyTests.md ================================================ # Assembly Tests The Benchmark library provides a number of functions whose primary purpose in to affect assembly generation, including `DoNotOptimize` and `ClobberMemory`. In addition there are other functions, such as `KeepRunning`, for which generating good assembly is paramount. For these functions it's important to have tests that verify the correctness and quality of the implementation. This requires testing the code generated by the compiler. This document describes how the Benchmark library tests compiler output, as well as how to properly write new tests. ## Anatomy of a Test Writing a test has two steps: * Write the code you want to generate assembly for. * Add `// CHECK` lines to match against the verified assembly. Example: ```c++ // CHECK-LABEL: test_add: extern "C" int test_add() { extern int ExternInt; return ExternInt + 1; // CHECK: movl ExternInt(%rip), %eax // CHECK: addl %eax // CHECK: ret } ``` #### LLVM Filecheck [LLVM's Filecheck](https://llvm.org/docs/CommandGuide/FileCheck.html) is used to test the generated assembly against the `// CHECK` lines specified in the tests source file. Please see the documentation linked above for information on how to write `CHECK` directives. #### Tips and Tricks: * Tests should match the minimal amount of output required to establish correctness. `CHECK` directives don't have to match on the exact next line after the previous match, so tests should omit checks for unimportant bits of assembly. ([`CHECK-NEXT`](https://llvm.org/docs/CommandGuide/FileCheck.html#the-check-next-directive) can be used to ensure a match occurs exactly after the previous match). * The tests are compiled with `-O3 -g0`. So we're only testing the optimized output. * The assembly output is further cleaned up using `tools/strip_asm.py`. This removes comments, assembler directives, and unused labels before the test is run. * The generated and stripped assembly file for a test is output under `/test/.s` * Filecheck supports using [`CHECK` prefixes](https://llvm.org/docs/CommandGuide/FileCheck.html#cmdoption-check-prefixes) to specify lines that should only match in certain situations. The Benchmark tests use `CHECK-CLANG` and `CHECK-GNU` for lines that are only expected to match Clang or GCC's output respectively. Normal `CHECK` lines match against all compilers. (Note: `CHECK-NOT` and `CHECK-LABEL` are NOT prefixes. They are versions of non-prefixed `CHECK` lines) * Use `extern "C"` to disable name mangling for specific functions. This makes them easier to name in the `CHECK` lines. ## Problems Writing Portable Tests Writing tests which check the code generated by a compiler are inherently non-portable. Different compilers and even different compiler versions may generate entirely different code. The Benchmark tests must tolerate this. LLVM Filecheck provides a number of mechanisms to help write "more portable" tests; including [matching using regular expressions](https://llvm.org/docs/CommandGuide/FileCheck.html#filecheck-pattern-matching-syntax), allowing the creation of [named variables](https://llvm.org/docs/CommandGuide/FileCheck.html#filecheck-variables) for later matching, and [checking non-sequential matches](https://llvm.org/docs/CommandGuide/FileCheck.html#the-check-dag-directive). #### Capturing Variables For example, say GCC stores a variable in a register but Clang stores it in memory. To write a test that tolerates both cases we "capture" the destination of the store, and then use the captured expression to write the remainder of the test. ```c++ // CHECK-LABEL: test_div_no_op_into_shr: extern "C" void test_div_no_op_into_shr(int value) { int divisor = 2; benchmark::DoNotOptimize(divisor); // hide the value from the optimizer return value / divisor; // CHECK: movl $2, [[DEST:.*]] // CHECK: idivl [[DEST]] // CHECK: ret } ``` #### Using Regular Expressions to Match Differing Output Often tests require testing assembly lines which may subtly differ between compilers or compiler versions. A common example of this is matching stack frame addresses. In this case regular expressions can be used to match the differing bits of output. For example: ```c++ int ExternInt; struct Point { int x, y, z; }; // CHECK-LABEL: test_store_point: extern "C" void test_store_point() { Point p{ExternInt, ExternInt, ExternInt}; benchmark::DoNotOptimize(p); // CHECK: movl ExternInt(%rip), %eax // CHECK: movl %eax, -{{[0-9]+}}(%rsp) // CHECK: movl %eax, -{{[0-9]+}}(%rsp) // CHECK: movl %eax, -{{[0-9]+}}(%rsp) // CHECK: ret } ``` ## Current Requirements and Limitations The tests require Filecheck to be installed along the `PATH` of the build machine. Otherwise the tests will be disabled. Additionally, as mentioned in the previous section, codegen tests are inherently non-portable. Currently the tests are limited to: * x86_64 targets. * Compiled with GCC or Clang Further work could be done, at least on a limited basis, to extend the tests to other architectures and compilers (using `CHECK` prefixes). Furthermore, the tests fail for builds which specify additional flags that modify code generation, including `--coverage` or `-fsanitize=`. ================================================ FILE: docs/_config.yml ================================================ theme: jekyll-theme-minimal logo: /assets/images/icon_black.png show_downloads: true ================================================ FILE: docs/dependencies.md ================================================ # Build tool dependency policy We follow the [Foundational C++ support policy](https://opensource.google/documentation/policies/cplusplus-support) for our build tools. In particular the ["Build Systems" section](https://opensource.google/documentation/policies/cplusplus-support#build-systems). ## CMake The current supported version is CMake 3.10 as of 2023-08-10. Most modern distributions include newer versions, for example: * Ubuntu 20.04 provides CMake 3.16.3 * Debian 11.4 provides CMake 3.18.4 * Ubuntu 22.04 provides CMake 3.22.1 ## Python The Python bindings require Python 3.10+ as of v1.9.0 (2024-08-16) for installation from PyPI. Building from source for older versions probably still works, though. See the [user guide](python_bindings.md) for details on how to build from source. The minimum theoretically supported version is Python 3.8, since the used bindings generator (nanobind) only supports Python 3.8+. ================================================ FILE: docs/index.md ================================================ # Benchmark * [Assembly Tests](AssemblyTests.md) * [Dependencies](dependencies.md) * [Perf Counters](perf_counters.md) * [Platform Specific Build Instructions](platform_specific_build_instructions.md) * [Python Bindings](python_bindings.md) * [Random Interleaving](random_interleaving.md) * [Reducing Variance](reducing_variance.md) * [Releasing](releasing.md) * [Tools](tools.md) * [User Guide](user_guide.md) ================================================ FILE: docs/perf_counters.md ================================================ # User-Requested Performance Counters When running benchmarks, the user may choose to request collection of performance counters. This may be useful in investigation scenarios - narrowing down the cause of a regression; or verifying that the underlying cause of a performance improvement matches expectations. This feature is available if: * The benchmark is run on an architecture featuring a Performance Monitoring Unit (PMU), * The benchmark is compiled with support for collecting counters. Currently, this requires [libpfm](http://perfmon2.sourceforge.net/), which is built as a dependency via Bazel. The feature does not require modifying benchmark code. Counter collection is handled at the boundaries where timer collection is also handled. To opt-in: * If using a Bazel build, add `--define pfm=1` to your build flags * If using CMake: * Install `libpfm4-dev`, e.g. `apt-get install libpfm4-dev`. * Enable the CMake flag `BENCHMARK_ENABLE_LIBPFM` in `CMakeLists.txt`. To use, pass a comma-separated list of counter names through the `--benchmark_perf_counters` flag. The names are decoded through libpfm - meaning, they are platform specific, but some (e.g. `CYCLES` or `INSTRUCTIONS`) are mapped by libpfm to platform-specifics - see libpfm [documentation](http://perfmon2.sourceforge.net/docs.html) for more details. The counter values are reported back through the [User Counters](../README.md#custom-counters) mechanism, meaning, they are available in all the formats (e.g. JSON) supported by User Counters. ================================================ FILE: docs/platform_specific_build_instructions.md ================================================ # Platform Specific Build Instructions ## Building with GCC When the library is built using GCC it is necessary to link with the pthread library due to how GCC implements `std::thread`. Failing to link to pthread will lead to runtime exceptions (unless you're using libc++), not linker errors. See [issue #67](https://github.com/google/benchmark/issues/67) for more details. You can link to pthread by adding `-pthread` to your linker command. Note, you can also use `-lpthread`, but there are potential issues with ordering of command line parameters if you use that. On QNX, the pthread library is part of libc and usually included automatically (see [`pthread_create()`](https://www.qnx.com/developers/docs/7.1/index.html#com.qnx.doc.neutrino.lib_ref/topic/p/pthread_create.html)). There's no separate pthread library to link. ## Building with Visual Studio 2015, 2017 or 2022 The `shlwapi` library (`-lshlwapi`) is required to support a call to `CPUInfo` which reads the registry. Either add `shlwapi.lib` under `[ Configuration Properties > Linker > Input ]`, or use the following: ``` // Alternatively, can add libraries using linker options. // First, Add the path to the generated library files (directory containing the `benchmark.lib`) in `[Configuration Properties > Linker > General > Additional Library Directories]`. Then do the following: #ifdef _WIN32 #pragma comment ( lib, "Shlwapi.lib" ) #ifdef _DEBUG #pragma comment ( lib, "benchmark.lib" ) #else #pragma comment ( lib, "benchmark.lib" ) #endif #endif ``` When using the static library, make sure to add `BENCHMARK_STATIC_DEFINE` under `[Configuration Properties > C/C++ > Preprocessor > Preprocessor Definitions]` Can also use the graphical version of CMake: * Open `CMake GUI`. * Under `Where to build the binaries`, same path as source plus `build`. * Under `CMAKE_INSTALL_PREFIX`, same path as source plus `install`. * Click `Configure`, `Generate`, `Open Project`. * If build fails, try deleting entire directory and starting again, or unticking options to build less. ## Building with Intel 2015 Update 1 or Intel System Studio Update 4 See instructions for building with Visual Studio. Once built, right click on the solution and change the build to Intel. ## Building on Solaris If you're running benchmarks on solaris, you'll want the kstat library linked in too (`-lkstat`). ================================================ FILE: docs/python_bindings.md ================================================ # Building and installing Python bindings Python bindings are available as wheels on [PyPI](https://pypi.org/project/google-benchmark/) for importing and using Google Benchmark directly in Python. Currently, pre-built wheels exist for macOS (both ARM64 and Intel x86), Linux x86-64 and 64-bit Windows. Supported Python versions are Python 3.8 - 3.12. To install Google Benchmark's Python bindings, run: ```bash python -m pip install --upgrade pip # for manylinux2014 support python -m pip install google-benchmark ``` In order to keep your system Python interpreter clean, it is advisable to run these commands in a virtual environment. See the [official Python documentation](https://docs.python.org/3/library/venv.html) on how to create virtual environments. To build a wheel directly from source, you can follow these steps: ```bash git clone https://github.com/google/benchmark.git cd benchmark # create a virtual environment and activate it python3 -m venv venv --system-site-packages source venv/bin/activate # .\venv\Scripts\Activate.ps1 on Windows # upgrade Python's system-wide packages python -m pip install --upgrade pip build # builds the wheel and stores it in the directory "dist". python -m build ``` NB: Building wheels from source requires Bazel. For platform-specific instructions on how to install Bazel, refer to the [Bazel installation docs](https://bazel.build/install). ================================================ FILE: docs/random_interleaving.md ================================================ # Random Interleaving [Random Interleaving](https://github.com/google/benchmark/issues/1051) is a technique to lower run-to-run variance. It randomly interleaves repetitions of a microbenchmark with repetitions from other microbenchmarks in the same benchmark test. Data shows it is able to lower run-to-run variance by [40%](https://github.com/google/benchmark/issues/1051) on average. To use, you mainly need to set `--benchmark_enable_random_interleaving=true`, and optionally specify non-zero repetition count `--benchmark_repetitions=9` and optionally decrease the per-repetition time `--benchmark_min_time=0.1`. ================================================ FILE: docs/reducing_variance.md ================================================ # Reducing Variance ## Disabling CPU Frequency Scaling If you see this error: ``` ***WARNING*** CPU scaling is enabled, the benchmark real time measurements may be noisy and will incur extra overhead. ``` you might want to disable the CPU frequency scaling while running the benchmark, as well as consider other ways to stabilize the performance of your system while benchmarking. Exactly how to do this depends on the Linux distribution, desktop environment, and installed programs. Specific details are a moving target, so we will not attempt to exhaustively document them here. One simple option is to use the `cpupower` program to change the performance governor to "performance". This tool is maintained along with the Linux kernel and provided by your distribution. It must be run as root, like this: ```bash sudo cpupower frequency-set --governor performance ``` After this you can verify that all CPUs are using the performance governor by running this command: ```bash cpupower frequency-info -o proc ``` The benchmarks you subsequently run will have less variance. ## Disabling ASLR If you see this error: ``` ***WARNING*** ASLR is enabled, the results may have unreproducible noise in them. ``` you might want to disable the ASLR security hardening feature while running the benchmark. The simplest way is to add ``` benchmark::MaybeReenterWithoutASLR(argc, argv); ``` as the first line of your `main()` function. It will try to disable ASLR for the current processor, and, if successful, re-execute the binary. Note that `personality(2)` may be forbidden by e.g. seccomp (which happens by default if you are running in a Docker container). Note that if you link to `benchmark_main` already does that for you. To globally disable ASLR on Linux, run ``` echo 0 > /proc/sys/kernel/randomize_va_space ``` To run a single benchmark with ASLR disabled on Linux, do: ``` setarch `uname -m` -R ./a_benchmark ``` Note that for the information on how to disable ASLR on other operating systems, please refer to their documentation. ## Reducing Variance in Benchmarks The Linux CPU frequency governor [discussed above](user_guide#disabling-cpu-frequency-scaling) is not the only source of noise in benchmarks. Some, but not all, of the sources of variance include: 1. On multi-core machines not all CPUs/CPU cores/CPU threads run the same speed, so running a benchmark one time and then again may give a different result depending on which CPU it ran on. 2. CPU scaling features that run on the CPU, like Intel's Turbo Boost and AMD Turbo Core and Precision Boost, can temporarily change the CPU frequency even when the using the "performance" governor on Linux. 3. Context switching between CPUs, or scheduling competition on the CPU the benchmark is running on. 4. Intel Hyperthreading or AMD SMT causing the same issue as above. 5. Cache effects caused by code running on other CPUs. 6. Non-uniform memory architectures (NUMA). These can cause variance in benchmarks results within a single run (`--benchmark_repetitions=N`) or across multiple runs of the benchmark program. Reducing sources of variance is OS and architecture dependent, which is one reason some companies maintain machines dedicated to performance testing. Some of the easier and effective ways of reducing variance on a typical Linux workstation are: 1. Use the performance governor as [discussed above](user_guide#disabling-cpu-frequency-scaling). 2. Disable processor boosting by: ```sh echo 0 | sudo tee /sys/devices/system/cpu/cpufreq/boost ``` See the Linux kernel's [boost.txt](https://www.kernel.org/doc/Documentation/cpu-freq/boost.txt) for more information. 3. Set the benchmark program's task affinity to a fixed cpu. For example: ```sh taskset -c 0 ./mybenchmark ``` 4. Increase the program's scheduling priority to minimize context switches using `nice` or `chrt`: ```sh sudo nice -n -20 ./mybenchmark sudo chrt -f 80 ./mybenchmark ``` 5. Disabling Hyperthreading/SMT. This can be done in the Bios or using the `/sys` file system (see the LLVM project's [Benchmarking tips](https://llvm.org/docs/Benchmarking.html)). 6. Close other programs that do non-trivial things based on timers, such as your web browser, desktop environment, etc. 7. Reduce the working set of your benchmark to fit within the L1 cache, but do be aware that this may lead you to optimize for an unrealistic situation. Further resources on this topic: 1. The LLVM project's [Benchmarking tips](https://llvm.org/docs/Benchmarking.html). 1. The Arch Wiki [Cpu frequency scaling](https://wiki.archlinux.org/title/CPU_frequency_scaling) page. ================================================ FILE: docs/releasing.md ================================================ # How to release * Make sure you're on main and synced to HEAD * Ensure the project builds and tests run * `parallel -j0 exec ::: test/*_test` can help ensure everything at least passes * Prepare release notes * `git log $(git describe --abbrev=0 --tags)..HEAD` gives you the list of commits between the last annotated tag and HEAD * Pick the most interesting. * Create one last commit that updates the version saved in `CMakeLists.txt`, `MODULE.bazel`, and `bindings/python/google_benchmark/__init__.py` to the release version you're creating. (This version will be used if benchmark is installed from the archive you'll be creating in the next step.) ``` # CMakeLists.txt project (benchmark VERSION 1.9.0 LANGUAGES CXX) ``` ``` # MODULE.bazel module(name = "com_github_google_benchmark", version="1.9.0") ``` ``` # google_benchmark/__init__.py __version__ = "1.9.0" ``` * Create a release through github's interface * Note this will create a lightweight tag. * Update this to an annotated tag: * `git pull --tags` * `git tag -a -f ` * `git push --force --tags origin` * Confirm that the "Build and upload Python wheels" action runs to completion * Run it manually if it hasn't run. ================================================ FILE: docs/tools.md ================================================ # Benchmark Tools ## compare.py The `compare.py` can be used to compare the result of benchmarks. ### Dependencies The utility relies on the [scipy](https://www.scipy.org) package which can be installed using pip: ```bash pip3 install -r requirements.txt ``` ### Displaying aggregates only The switch `-a` / `--display_aggregates_only` can be used to control the displayment of the normal iterations vs the aggregates. When passed, it will be passthrough to the benchmark binaries to be run, and will be accounted for in the tool itself; only the aggregates will be displayed, but not normal runs. It only affects the display, the separate runs will still be used to calculate the U test. ### Modes of operation There are three modes of operation: 1. Just compare two benchmarks The program is invoked like: ``` bash $ compare.py benchmarks [benchmark options]... ``` Where `` and `` either specify a benchmark executable file, or a JSON output file. The type of the input file is automatically detected. If a benchmark executable is specified then the benchmark is run to obtain the results. Otherwise the results are simply loaded from the output file. `[benchmark options]` will be passed to the benchmarks invocations. They can be anything that binary accepts, be it either normal `--benchmark_*` parameters, or some custom parameters your binary takes. Example output: ``` $ ./compare.py benchmarks ./a.out ./a.out RUNNING: ./a.out --benchmark_out=/tmp/tmprBT5nW Run on (8 X 4000 MHz CPU s) 2017-11-07 21:16:44 ------------------------------------------------------ Benchmark Time CPU Iterations ------------------------------------------------------ BM_memcpy/8 36 ns 36 ns 19101577 211.669MB/s BM_memcpy/64 76 ns 76 ns 9412571 800.199MB/s BM_memcpy/512 84 ns 84 ns 8249070 5.64771GB/s BM_memcpy/1024 116 ns 116 ns 6181763 8.19505GB/s BM_memcpy/8192 643 ns 643 ns 1062855 11.8636GB/s BM_copy/8 222 ns 222 ns 3137987 34.3772MB/s BM_copy/64 1608 ns 1608 ns 432758 37.9501MB/s BM_copy/512 12589 ns 12589 ns 54806 38.7867MB/s BM_copy/1024 25169 ns 25169 ns 27713 38.8003MB/s BM_copy/8192 201165 ns 201112 ns 3486 38.8466MB/s RUNNING: ./a.out --benchmark_out=/tmp/tmpt1wwG_ Run on (8 X 4000 MHz CPU s) 2017-11-07 21:16:53 ------------------------------------------------------ Benchmark Time CPU Iterations ------------------------------------------------------ BM_memcpy/8 36 ns 36 ns 19397903 211.255MB/s BM_memcpy/64 73 ns 73 ns 9691174 839.635MB/s BM_memcpy/512 85 ns 85 ns 8312329 5.60101GB/s BM_memcpy/1024 118 ns 118 ns 6438774 8.11608GB/s BM_memcpy/8192 656 ns 656 ns 1068644 11.6277GB/s BM_copy/8 223 ns 223 ns 3146977 34.2338MB/s BM_copy/64 1611 ns 1611 ns 435340 37.8751MB/s BM_copy/512 12622 ns 12622 ns 54818 38.6844MB/s BM_copy/1024 25257 ns 25239 ns 27779 38.6927MB/s BM_copy/8192 205013 ns 205010 ns 3479 38.108MB/s Comparing ./a.out to ./a.out Benchmark Time CPU Time Old Time New CPU Old CPU New ------------------------------------------------------------------------------------------------------ BM_memcpy/8 +0.0020 +0.0020 36 36 36 36 BM_memcpy/64 -0.0468 -0.0470 76 73 76 73 BM_memcpy/512 +0.0081 +0.0083 84 85 84 85 BM_memcpy/1024 +0.0098 +0.0097 116 118 116 118 BM_memcpy/8192 +0.0200 +0.0203 643 656 643 656 BM_copy/8 +0.0046 +0.0042 222 223 222 223 BM_copy/64 +0.0020 +0.0020 1608 1611 1608 1611 BM_copy/512 +0.0027 +0.0026 12589 12622 12589 12622 BM_copy/1024 +0.0035 +0.0028 25169 25257 25169 25239 BM_copy/8192 +0.0191 +0.0194 201165 205013 201112 205010 ``` What it does is for the every benchmark from the first run it looks for the benchmark with exactly the same name in the second run, and then compares the results. If the names differ, the benchmark is omitted from the diff. As you can note, the values in `Time` and `CPU` columns are calculated as `(new - old) / |old|`. 2. Compare two different filters of one benchmark The program is invoked like: ``` bash $ compare.py filters [benchmark options]... ``` Where `` either specify a benchmark executable file, or a JSON output file. The type of the input file is automatically detected. If a benchmark executable is specified then the benchmark is run to obtain the results. Otherwise the results are simply loaded from the output file. Where `` and `` are the same regex filters that you would pass to the `[--benchmark_filter=]` parameter of the benchmark binary. `[benchmark options]` will be passed to the benchmarks invocations. They can be anything that binary accepts, be it either normal `--benchmark_*` parameters, or some custom parameters your binary takes. Example output: ``` $ ./compare.py filters ./a.out BM_memcpy BM_copy RUNNING: ./a.out --benchmark_filter=BM_memcpy --benchmark_out=/tmp/tmpBWKk0k Run on (8 X 4000 MHz CPU s) 2017-11-07 21:37:28 ------------------------------------------------------ Benchmark Time CPU Iterations ------------------------------------------------------ BM_memcpy/8 36 ns 36 ns 17891491 211.215MB/s BM_memcpy/64 74 ns 74 ns 9400999 825.646MB/s BM_memcpy/512 87 ns 87 ns 8027453 5.46126GB/s BM_memcpy/1024 111 ns 111 ns 6116853 8.5648GB/s BM_memcpy/8192 657 ns 656 ns 1064679 11.6247GB/s RUNNING: ./a.out --benchmark_filter=BM_copy --benchmark_out=/tmp/tmpAvWcOM Run on (8 X 4000 MHz CPU s) 2017-11-07 21:37:33 ---------------------------------------------------- Benchmark Time CPU Iterations ---------------------------------------------------- BM_copy/8 227 ns 227 ns 3038700 33.6264MB/s BM_copy/64 1640 ns 1640 ns 426893 37.2154MB/s BM_copy/512 12804 ns 12801 ns 55417 38.1444MB/s BM_copy/1024 25409 ns 25407 ns 27516 38.4365MB/s BM_copy/8192 202986 ns 202990 ns 3454 38.4871MB/s Comparing BM_memcpy to BM_copy (from ./a.out) Benchmark Time CPU Time Old Time New CPU Old CPU New -------------------------------------------------------------------------------------------------------------------- [BM_memcpy vs. BM_copy]/8 +5.2829 +5.2812 36 227 36 227 [BM_memcpy vs. BM_copy]/64 +21.1719 +21.1856 74 1640 74 1640 [BM_memcpy vs. BM_copy]/512 +145.6487 +145.6097 87 12804 87 12801 [BM_memcpy vs. BM_copy]/1024 +227.1860 +227.1776 111 25409 111 25407 [BM_memcpy vs. BM_copy]/8192 +308.1664 +308.2898 657 202986 656 202990 ``` As you can see, it applies filter to the benchmarks, both when running the benchmark, and before doing the diff. And to make the diff work, the matches are replaced with some common string. Thus, you can compare two different benchmark families within one benchmark binary. As you can note, the values in `Time` and `CPU` columns are calculated as `(new - old) / |old|`. 3. Compare filter one from benchmark one to filter two from benchmark two: The program is invoked like: ``` bash $ compare.py filters [benchmark options]... ``` Where `` and `` either specify a benchmark executable file, or a JSON output file. The type of the input file is automatically detected. If a benchmark executable is specified then the benchmark is run to obtain the results. Otherwise the results are simply loaded from the output file. Where `` and `` are the same regex filters that you would pass to the `[--benchmark_filter=]` parameter of the benchmark binary. `[benchmark options]` will be passed to the benchmarks invocations. They can be anything that binary accepts, be it either normal `--benchmark_*` parameters, or some custom parameters your binary takes. Example output: ``` $ ./compare.py benchmarksfiltered ./a.out BM_memcpy ./a.out BM_copy RUNNING: ./a.out --benchmark_filter=BM_memcpy --benchmark_out=/tmp/tmp_FvbYg Run on (8 X 4000 MHz CPU s) 2017-11-07 21:38:27 ------------------------------------------------------ Benchmark Time CPU Iterations ------------------------------------------------------ BM_memcpy/8 37 ns 37 ns 18953482 204.118MB/s BM_memcpy/64 74 ns 74 ns 9206578 828.245MB/s BM_memcpy/512 91 ns 91 ns 8086195 5.25476GB/s BM_memcpy/1024 120 ns 120 ns 5804513 7.95662GB/s BM_memcpy/8192 664 ns 664 ns 1028363 11.4948GB/s RUNNING: ./a.out --benchmark_filter=BM_copy --benchmark_out=/tmp/tmpDfL5iE Run on (8 X 4000 MHz CPU s) 2017-11-07 21:38:32 ---------------------------------------------------- Benchmark Time CPU Iterations ---------------------------------------------------- BM_copy/8 230 ns 230 ns 2985909 33.1161MB/s BM_copy/64 1654 ns 1653 ns 419408 36.9137MB/s BM_copy/512 13122 ns 13120 ns 53403 37.2156MB/s BM_copy/1024 26679 ns 26666 ns 26575 36.6218MB/s BM_copy/8192 215068 ns 215053 ns 3221 36.3283MB/s Comparing BM_memcpy (from ./a.out) to BM_copy (from ./a.out) Benchmark Time CPU Time Old Time New CPU Old CPU New -------------------------------------------------------------------------------------------------------------------- [BM_memcpy vs. BM_copy]/8 +5.1649 +5.1637 37 230 37 230 [BM_memcpy vs. BM_copy]/64 +21.4352 +21.4374 74 1654 74 1653 [BM_memcpy vs. BM_copy]/512 +143.6022 +143.5865 91 13122 91 13120 [BM_memcpy vs. BM_copy]/1024 +221.5903 +221.4790 120 26679 120 26666 [BM_memcpy vs. BM_copy]/8192 +322.9059 +323.0096 664 215068 664 215053 ``` This is a mix of the previous two modes, two (potentially different) benchmark binaries are run, and a different filter is applied to each one. As you can note, the values in `Time` and `CPU` columns are calculated as `(new - old) / |old|`. ### Note: Interpreting the output Performance measurements are an art, and performance comparisons are doubly so. Results are often noisy and don't necessarily have large absolute differences to them, so just by visual inspection, it is not at all apparent if two measurements are actually showing a performance change or not. It is even more confusing with multiple benchmark repetitions. Thankfully, what we can do, is use statistical tests on the results to determine whether the performance has statistically-significantly changed. `compare.py` uses [Mann–Whitney U test](https://en.wikipedia.org/wiki/Mann%E2%80%93Whitney_U_test), with a null hypothesis being that there's no difference in performance. **The below output is a summary of a benchmark comparison with statistics provided for a multi-threaded process.** ``` Benchmark Time CPU Time Old Time New CPU Old CPU New ----------------------------------------------------------------------------------------------------------------------------- benchmark/threads:1/process_time/real_time_pvalue 0.0000 0.0000 U Test, Repetitions: 27 vs 27 benchmark/threads:1/process_time/real_time_mean -0.1442 -0.1442 90 77 90 77 benchmark/threads:1/process_time/real_time_median -0.1444 -0.1444 90 77 90 77 benchmark/threads:1/process_time/real_time_stddev +0.3974 +0.3933 0 0 0 0 benchmark/threads:1/process_time/real_time_cv +0.6329 +0.6280 0 0 0 0 OVERALL_GEOMEAN -0.1442 -0.1442 0 0 0 0 ``` -------------------------------------------- Here's a breakdown of each row: **benchmark/threads:1/process_time/real_time_pvalue**: This shows the _p-value_ for the statistical test comparing the performance of the process running with one thread. A value of 0.0000 suggests a statistically significant difference in performance. The comparison was conducted using the U Test (Mann-Whitney U Test) with 27 repetitions for each case. **benchmark/threads:1/process_time/real_time_mean**: This shows the relative difference in mean execution time between two different cases. The negative value (-0.1442) implies that the new process is faster by about 14.42%. The old time was 90 units, while the new time is 77 units. **benchmark/threads:1/process_time/real_time_median**: Similarly, this shows the relative difference in the median execution time. Again, the new process is faster by 14.44%. **benchmark/threads:1/process_time/real_time_stddev**: This is the relative difference in the standard deviation of the execution time, which is a measure of how much variation or dispersion there is from the mean. A positive value (+0.3974) implies there is more variance in the execution time in the new process. **benchmark/threads:1/process_time/real_time_cv**: CV stands for Coefficient of Variation. It is the ratio of the standard deviation to the mean. It provides a standardized measure of dispersion. An increase (+0.6329) indicates more relative variability in the new process. **OVERALL_GEOMEAN**: Geomean stands for geometric mean, a type of average that is less influenced by outliers. The negative value indicates a general improvement in the new process. However, given the values are all zero for the old and new times, this seems to be a mistake or placeholder in the output. ----------------------------------------- Let's first try to see what the different columns represent in the above `compare.py` benchmarking output: 1. **Benchmark:** The name of the function being benchmarked, along with the size of the input (after the slash). 2. **Time:** The average time per operation, across all iterations. 3. **CPU:** The average CPU time per operation, across all iterations. 4. **Iterations:** The number of iterations the benchmark was run to get a stable estimate. 5. **Time Old and Time New:** These represent the average time it takes for a function to run in two different scenarios or versions. For example, you might be comparing how fast a function runs before and after you make some changes to it. 6. **CPU Old and CPU New:** These show the average amount of CPU time that the function uses in two different scenarios or versions. This is similar to Time Old and Time New, but focuses on CPU usage instead of overall time. In the comparison section, the relative differences in both time and CPU time are displayed for each input size. A statistically-significant difference is determined by a **p-value**, which is a measure of the probability that the observed difference could have occurred just by random chance. A smaller p-value indicates stronger evidence against the null hypothesis. **Therefore:** 1. If the p-value is less than the chosen significance level (alpha), we reject the null hypothesis and conclude the benchmarks are significantly different. 2. If the p-value is greater than or equal to alpha, we fail to reject the null hypothesis and treat the two benchmarks as similar. The result of said the statistical test is additionally communicated through color coding: ```diff + Green: ``` The benchmarks are _**statistically different**_. This could mean the performance has either **significantly improved** or **significantly deteriorated**. You should look at the actual performance numbers to see which is the case. ```diff - Red: ``` The benchmarks are _**statistically similar**_. This means the performance **hasn't significantly changed**. In statistical terms, **'green'** means we reject the null hypothesis that there's no difference in performance, and **'red'** means we fail to reject the null hypothesis. This might seem counter-intuitive if you're expecting 'green' to mean 'improved performance' and 'red' to mean 'worsened performance'. ```bash But remember, in this context: 'Success' means 'successfully finding a difference'. 'Failure' means 'failing to find a difference'. ``` Also, please note that **even if** we determine that there **is** a statistically-significant difference between the two measurements, it does not _necessarily_ mean that the actual benchmarks that were measured **are** different, or vice versa, even if we determine that there is **no** statistically-significant difference between the two measurements, it does not necessarily mean that the actual benchmarks that were measured **are not** different. ### U test If there is a sufficient repetition count of the benchmarks, the tool can do a [U Test](https://en.wikipedia.org/wiki/Mann%E2%80%93Whitney_U_test), of the null hypothesis that it is equally likely that a randomly selected value from one sample will be less than or greater than a randomly selected value from a second sample. If the calculated p-value is below this value is lower than the significance level alpha, then the result is said to be statistically significant and the null hypothesis is rejected. Which in other words means that the two benchmarks aren't identical. **WARNING**: requires **LARGE** (no less than 9) number of repetitions to be meaningful! ================================================ FILE: docs/user_guide.md ================================================ # User Guide ## Command Line [Output Formats](#output-formats) [Output Files](#output-files) [Running Benchmarks](#running-benchmarks) [Running a Subset of Benchmarks](#running-a-subset-of-benchmarks) [Result Comparison](#result-comparison) [Extra Context](#extra-context) ## Library [Runtime and Reporting Considerations](#runtime-and-reporting-considerations) [Setup/Teardown](#setupteardown) [Passing Arguments](#passing-arguments) [Custom Benchmark Name](#custom-benchmark-name) [Calculating Asymptotic Complexity](#asymptotic-complexity) [Templated Benchmarks](#templated-benchmarks) [Templated Benchmarks that take arguments](#templated-benchmarks-with-arguments) [Fixtures](#fixtures) [Custom Counters](#custom-counters) [Multithreaded Benchmarks](#multithreaded-benchmarks) [CPU Timers](#cpu-timers) [Manual Timing](#manual-timing) [Setting the Time Unit](#setting-the-time-unit) [Random Interleaving](random_interleaving.md) [User-Requested Performance Counters](perf_counters.md) [Preventing Optimization](#preventing-optimization) [Reporting Statistics](#reporting-statistics) [Custom Statistics](#custom-statistics) [Memory Usage](#memory-usage) [Using RegisterBenchmark](#using-register-benchmark) [Exiting with an Error](#exiting-with-an-error) [A Faster `KeepRunning` Loop](#a-faster-keep-running-loop) ## Benchmarking Tips [Disabling CPU Frequency Scaling](#disabling-cpu-frequency-scaling) [Reducing Variance in Benchmarks](reducing_variance.md) ## Output Formats The library supports multiple output formats. Use the `--benchmark_format=` flag (or set the `BENCHMARK_FORMAT=` environment variable) to set the format type. `console` is the default format. The Console format is intended to be a human readable format. By default the format generates color output. Context is output on stderr and the tabular data on stdout. Example tabular output looks like: ``` Benchmark Time(ns) CPU(ns) Iterations ---------------------------------------------------------------------- BM_SetInsert/1024/1 28928 29349 23853 133.097kiB/s 33.2742k items/s BM_SetInsert/1024/8 32065 32913 21375 949.487kiB/s 237.372k items/s BM_SetInsert/1024/10 33157 33648 21431 1.13369MiB/s 290.225k items/s ``` The JSON format outputs human readable json split into two top level attributes. The `context` attribute contains information about the run in general, including information about the CPU and the date. The `benchmarks` attribute contains a list of every benchmark run. Example json output looks like: ```json { "context": { "date": "2015/03/17-18:40:25", "num_cpus": 40, "mhz_per_cpu": 2801, "cpu_scaling_enabled": false, "build_type": "debug" }, "benchmarks": [ { "name": "BM_SetInsert/1024/1", "iterations": 94877, "real_time": 29275, "cpu_time": 29836, "bytes_per_second": 134066, "items_per_second": 33516 }, { "name": "BM_SetInsert/1024/8", "iterations": 21609, "real_time": 32317, "cpu_time": 32429, "bytes_per_second": 986770, "items_per_second": 246693 }, { "name": "BM_SetInsert/1024/10", "iterations": 21393, "real_time": 32724, "cpu_time": 33355, "bytes_per_second": 1199226, "items_per_second": 299807 } ] } ``` The CSV format outputs comma-separated values. The `context` is output on stderr and the CSV itself on stdout. Example CSV output looks like: ``` name,iterations,real_time,cpu_time,bytes_per_second,items_per_second,label "BM_SetInsert/1024/1",65465,17890.7,8407.45,475768,118942, "BM_SetInsert/1024/8",116606,18810.1,9766.64,3.27646e+06,819115, "BM_SetInsert/1024/10",106365,17238.4,8421.53,4.74973e+06,1.18743e+06, ``` ## Output Files Write benchmark results to a file with the `--benchmark_out=` option (or set `BENCHMARK_OUT`). Specify the output format with `--benchmark_out_format={json|console|csv}` (or set `BENCHMARK_OUT_FORMAT={json|console|csv}`). Note that the 'csv' reporter is deprecated and the saved `.csv` file [is not parsable](https://github.com/google/benchmark/issues/794) by csv parsers. Specifying `--benchmark_out` does not suppress the console output. ## Running Benchmarks Benchmarks are executed by running the produced binaries. Benchmarks binaries, by default, accept options that may be specified either through their command line interface or by setting environment variables before execution. For every `--option_flag=` CLI switch, a corresponding environment variable `OPTION_FLAG=` exist and is used as default if set (CLI switches always prevails). A complete list of CLI options is available running benchmarks with the `--help` switch. ### Dry runs To confirm that benchmarks can run successfully without needing to wait for multiple repetitions and iterations, the `--benchmark_dry_run` flag can be used. This will run the benchmarks as normal, but for 1 iteration and 1 repetition only. ## Running a Subset of Benchmarks The `--benchmark_filter=` option (or `BENCHMARK_FILTER=` environment variable) can be used to only run the benchmarks that match the specified ``. For example: ```bash $ ./run_benchmarks.x --benchmark_filter=BM_memcpy/32 Run on (1 X 2300 MHz CPU ) 2016-06-25 19:34:24 Benchmark Time CPU Iterations ---------------------------------------------------- BM_memcpy/32 11 ns 11 ns 79545455 BM_memcpy/32k 2181 ns 2185 ns 324074 BM_memcpy/32 12 ns 12 ns 54687500 BM_memcpy/32k 1834 ns 1837 ns 357143 ``` ## Disabling Benchmarks It is possible to temporarily disable benchmarks by renaming the benchmark function to have the prefix "DISABLED_". This will cause the benchmark to be skipped at runtime. ## Result comparison It is possible to compare the benchmarking results. See [Additional Tooling Documentation](tools.md) ## Extra Context Sometimes it's useful to add extra context to the content printed before the results. By default this section includes information about the CPU on which the benchmarks are running. If you do want to add more context, you can use the `benchmark_context` command line flag: ```bash $ ./run_benchmarks --benchmark_context=pwd=`pwd` Run on (1 x 2300 MHz CPU) pwd: /home/user/benchmark/ Benchmark Time CPU Iterations ---------------------------------------------------- BM_memcpy/32 11 ns 11 ns 79545455 BM_memcpy/32k 2181 ns 2185 ns 324074 ``` You can get the same effect with the API: ```c++ benchmark::AddCustomContext("foo", "bar"); ``` Note that attempts to add a second value with the same key will fail with an error message. ## Runtime and Reporting Considerations When the benchmark binary is executed, each benchmark function is run serially. The number of iterations to run is determined dynamically by running the benchmark a few times and measuring the time taken and ensuring that the ultimate result will be statistically stable. As such, faster benchmark functions will be run for more iterations than slower benchmark functions, and the number of iterations is thus reported. In all cases, the number of iterations for which the benchmark is run is governed by the amount of time the benchmark takes. Concretely, the number of iterations is at least one, not more than 1e9, until CPU time is greater than the minimum time, or the wallclock time is 5x minimum time. The minimum time is set per benchmark by calling `MinTime` on the registered benchmark object. Furthermore warming up a benchmark might be necessary in order to get stable results because of e.g caching effects of the code under benchmark. Warming up means running the benchmark a given amount of time, before results are actually taken into account. The amount of time for which the warmup should be run can be set per benchmark by calling `MinWarmUpTime` on the registered benchmark object or for all benchmarks using the `--benchmark_min_warmup_time` command-line option. Note that `MinWarmUpTime` will overwrite the value of `--benchmark_min_warmup_time` for the single benchmark. How many iterations the warmup run of each benchmark takes is determined the same way as described in the paragraph above. Per default the warmup phase is set to 0 seconds and is therefore disabled. Average timings are then reported over the iterations run. If multiple repetitions are requested using the `--benchmark_repetitions` command-line option, or at registration time, the benchmark function will be run several times and statistical results across these repetitions will also be reported. As well as the per-benchmark entries, a preamble in the report will include information about the machine on which the benchmarks are run. ## Setup/Teardown Global setup/teardown specific to each benchmark can be done by passing a callback to Setup/Teardown: The setup/teardown callbacks will be invoked once for each benchmark. If the benchmark is multi-threaded (will run in k threads), they will be invoked exactly once before each run with k threads. If the benchmark uses different size groups of threads, the above will be true for each size group. Eg., ```c++ static void DoSetup(const benchmark::State& state) { } static void DoTeardown(const benchmark::State& state) { } static void BM_func(benchmark::State& state) {...} BENCHMARK(BM_func)->Arg(1)->Arg(3)->Threads(16)->Threads(32)->Setup(DoSetup)->Teardown(DoTeardown); ``` In this example, `DoSetup` and `DoTearDown` will be invoked 4 times each, specifically, once for each of this family: - BM_func_Arg_1_Threads_16, BM_func_Arg_1_Threads_32 - BM_func_Arg_3_Threads_16, BM_func_Arg_3_Threads_32 ## Passing Arguments Sometimes a family of benchmarks can be implemented with just one routine that takes an extra argument to specify which one of the family of benchmarks to run. For example, the following code defines a family of benchmarks for measuring the speed of `memcpy()` calls of different lengths: ```c++ static void BM_memcpy(benchmark::State& state) { char* src = new char[state.range(0)]; char* dst = new char[state.range(0)]; memset(src, 'x', state.range(0)); for (auto _ : state) memcpy(dst, src, state.range(0)); state.SetBytesProcessed(int64_t(state.iterations()) * int64_t(state.range(0))); delete[] src; delete[] dst; } BENCHMARK(BM_memcpy)->Arg(8)->Arg(64)->Arg(512)->Arg(4<<10)->Arg(8<<10); ``` The preceding code is quite repetitive, and can be replaced with the following short-hand. The following invocation will pick a few appropriate arguments in the specified range and will generate a benchmark for each such argument. ```c++ BENCHMARK(BM_memcpy)->Range(8, 8<<10); ``` By default the arguments in the range are generated in multiples of eight and the command above selects [ 8, 64, 512, 4k, 8k ]. In the following code the range multiplier is changed to multiples of two. ```c++ BENCHMARK(BM_memcpy)->RangeMultiplier(2)->Range(8, 8<<10); ``` Now arguments generated are [ 8, 16, 32, 64, 128, 256, 512, 1024, 2k, 4k, 8k ]. The preceding code shows a method of defining a sparse range. The following example shows a method of defining a dense range. It is then used to benchmark the performance of `std::vector` initialization for uniformly increasing sizes. ```c++ static void BM_DenseRange(benchmark::State& state) { for(auto _ : state) { std::vector v(state.range(0), state.range(0)); auto data = v.data(); benchmark::DoNotOptimize(data); benchmark::ClobberMemory(); } } BENCHMARK(BM_DenseRange)->DenseRange(0, 1024, 128); ``` Now arguments generated are [ 0, 128, 256, 384, 512, 640, 768, 896, 1024 ]. You might have a benchmark that depends on two or more inputs. For example, the following code defines a family of benchmarks for measuring the speed of set insertion. ```c++ static void BM_SetInsert(benchmark::State& state) { std::set data; for (auto _ : state) { state.PauseTiming(); data = ConstructRandomSet(state.range(0)); state.ResumeTiming(); for (int j = 0; j < state.range(1); ++j) data.insert(RandomNumber()); } } BENCHMARK(BM_SetInsert) ->Args({1<<10, 128}) ->Args({2<<10, 128}) ->Args({4<<10, 128}) ->Args({8<<10, 128}) ->Args({1<<10, 512}) ->Args({2<<10, 512}) ->Args({4<<10, 512}) ->Args({8<<10, 512}); ``` The preceding code is quite repetitive, and can be replaced with the following short-hand. The following macro will pick a few appropriate arguments in the product of the two specified ranges and will generate a benchmark for each such pair. ```c++ BENCHMARK(BM_SetInsert)->Ranges({{1<<10, 8<<10}, {128, 512}}); ``` Some benchmarks may require specific argument values that cannot be expressed with `Ranges`. In this case, `ArgsProduct` offers the ability to generate a benchmark input for each combination in the product of the supplied vectors. ```c++ BENCHMARK(BM_SetInsert) ->ArgsProduct({{1<<10, 3<<10, 8<<10}, {20, 40, 60, 80}}) // would generate the same benchmark arguments as BENCHMARK(BM_SetInsert) ->Args({1<<10, 20}) ->Args({3<<10, 20}) ->Args({8<<10, 20}) ->Args({3<<10, 40}) ->Args({8<<10, 40}) ->Args({1<<10, 40}) ->Args({1<<10, 60}) ->Args({3<<10, 60}) ->Args({8<<10, 60}) ->Args({1<<10, 80}) ->Args({3<<10, 80}) ->Args({8<<10, 80}); ``` For the most common scenarios, helper methods for creating a list of integers for a given sparse or dense range are provided. ```c++ BENCHMARK(BM_SetInsert) ->ArgsProduct({ benchmark::CreateRange(8, 128, /*multi=*/2), benchmark::CreateDenseRange(1, 4, /*step=*/1) }) // would generate the same benchmark arguments as BENCHMARK(BM_SetInsert) ->ArgsProduct({ {8, 16, 32, 64, 128}, {1, 2, 3, 4} }); ``` For more complex patterns of inputs, passing a custom function to `Apply` allows programmatic specification of an arbitrary set of arguments on which to run the benchmark. The following example enumerates a dense range on one parameter, and a sparse range on the second. ```c++ static void CustomArguments(benchmark::Benchmark* b) { for (int i = 0; i <= 10; ++i) for (int j = 32; j <= 1024*1024; j *= 8) b->Args({i, j}); } BENCHMARK(BM_SetInsert)->Apply(CustomArguments); ``` ### Passing Arbitrary Arguments to a Benchmark It is possible to define a benchmark that takes an arbitrary number of extra arguments. The `BENCHMARK_CAPTURE(func, test_case_name, ...args)` macro creates a benchmark that invokes `func` with the `benchmark::State` as the first argument followed by the specified `args...`. The `test_case_name` is appended to the name of the benchmark and should describe the values passed. ```c++ template void BM_takes_args(benchmark::State& state, Args&&... args) { auto args_tuple = std::make_tuple(std::move(args)...); for (auto _ : state) { std::cout << std::get<0>(args_tuple) << ": " << std::get<1>(args_tuple) << '\n'; [...] } } // Registers a benchmark named "BM_takes_args/int_string_test" that passes // the specified values to `args`. BENCHMARK_CAPTURE(BM_takes_args, int_string_test, 42, std::string("abc")); // Registers the same benchmark "BM_takes_args/int_test" that passes // the specified values to `args`. BENCHMARK_CAPTURE(BM_takes_args, int_test, 42, 43); ``` Note that elements of `...args` may refer to global variables. Users should avoid modifying global state inside of a benchmark. ### Naming a Benchmark Without Capturing Arguments If you only need to give a benchmark a custom name (without passing extra arguments), use `BENCHMARK_NAMED(func, test_case_name)`. Unlike `BENCHMARK_CAPTURE`, this macro does not create a lambda, which avoids compiler and linker scalability issues when registering thousands of benchmarks. ```c++ void BM_Foo(benchmark::State& state) { for (auto _ : state) {} } // Registers a benchmark named "BM_Foo/my_variant" BENCHMARK_NAMED(BM_Foo, my_variant); ``` Use `BENCHMARK_CAPTURE` when you need to pass extra arguments; use `BENCHMARK_NAMED` when you only need the name. ## Calculating Asymptotic Complexity (Big O) Asymptotic complexity might be calculated for a family of benchmarks. The following code will calculate the coefficient for the high-order term in the running time and the normalized root-mean square error of string comparison. ```c++ static void BM_StringCompare(benchmark::State& state) { std::string s1(state.range(0), '-'); std::string s2(state.range(0), '-'); for (auto _ : state) { auto comparison_result = s1.compare(s2); benchmark::DoNotOptimize(comparison_result); } state.SetComplexityN(state.range(0)); } BENCHMARK(BM_StringCompare) ->RangeMultiplier(2)->Range(1<<10, 1<<18)->Complexity(benchmark::oN); ``` As shown in the following invocation, asymptotic complexity might also be calculated automatically. ```c++ BENCHMARK(BM_StringCompare) ->RangeMultiplier(2)->Range(1<<10, 1<<18)->Complexity(); ``` The following code will specify asymptotic complexity with a lambda function, that might be used to customize high-order term calculation. ```c++ BENCHMARK(BM_StringCompare)->RangeMultiplier(2) ->Range(1<<10, 1<<18)->Complexity([](benchmark::IterationCount n)->double{return n; }); ``` ## Custom Benchmark Name You can change the benchmark's name as follows: ```c++ BENCHMARK(BM_memcpy)->Name("memcpy")->RangeMultiplier(2)->Range(8, 8<<10); ``` The invocation will execute the benchmark as before using `BM_memcpy` but changes the prefix in the report to `memcpy`. ## Templated Benchmarks This example produces and consumes messages of size `sizeof(v)` `range_x` times. It also outputs throughput in the absence of multiprogramming. ```c++ template void BM_Sequential(benchmark::State& state) { Q q; typename Q::value_type v; for (auto _ : state) { for (int i = state.range(0); i--; ) q.push(v); for (int e = state.range(0); e--; ) q.Wait(&v); } // actually messages, not bytes: state.SetBytesProcessed( static_cast(state.iterations())*state.range(0)); } // You can use the BENCHMARK macro with template parameters: BENCHMARK(BM_Sequential>)->Range(1<<0, 1<<10); // Old, legacy verbose C++03 syntax: BENCHMARK_TEMPLATE(BM_Sequential, WaitQueue)->Range(1<<0, 1<<10); ``` Three macros are provided for adding benchmark templates. ```c++ #define BENCHMARK(func<...>) // Takes any number of parameters. #define BENCHMARK_TEMPLATE1(func, arg1) #define BENCHMARK_TEMPLATE2(func, arg1, arg2) ``` ## Templated Benchmarks that take arguments Sometimes there is a need to template benchmarks, and provide arguments to them. ```c++ template void BM_Sequential_With_Step(benchmark::State& state, int step) { Q q; typename Q::value_type v; for (auto _ : state) { for (int i = state.range(0); i-=step; ) q.push(v); for (int e = state.range(0); e-=step; ) q.Wait(&v); } // actually messages, not bytes: state.SetBytesProcessed( static_cast(state.iterations())*state.range(0)); } BENCHMARK_TEMPLATE1_CAPTURE(BM_Sequential, WaitQueue, Step1, 1)->Range(1<<0, 1<<10); ``` ## Fixtures Fixture tests are created by first defining a type that derives from `::benchmark::Fixture` and then creating/registering the tests using the following macros: * `BENCHMARK_F(ClassName, Method)` * `BENCHMARK_DEFINE_F(ClassName, Method)` * `BENCHMARK_REGISTER_F(ClassName, Method)` For Example: ```c++ class MyFixture : public benchmark::Fixture { public: void SetUp(::benchmark::State& state) { } void TearDown(::benchmark::State& state) { } }; // Defines and registers `FooTest` using the class `MyFixture`. BENCHMARK_F(MyFixture, FooTest)(benchmark::State& st) { for (auto _ : st) { ... } } // Only defines `BarTest` using the class `MyFixture`. BENCHMARK_DEFINE_F(MyFixture, BarTest)(benchmark::State& st) { for (auto _ : st) { ... } } // `BarTest` is NOT registered. BENCHMARK_REGISTER_F(MyFixture, BarTest)->Threads(2); // `BarTest` is now registered. ``` ### Templated Fixtures Also you can create templated fixture by using the following macros: * `BENCHMARK_TEMPLATE_F(ClassName, Method, ...)` * `BENCHMARK_TEMPLATE_DEFINE_F(ClassName, Method, ...)` For example: ```c++ template class MyFixture : public benchmark::Fixture {}; // Defines and registers `IntTest` using the class template `MyFixture`. BENCHMARK_TEMPLATE_F(MyFixture, IntTest, int)(benchmark::State& st) { for (auto _ : st) { ... } } // Only defines `DoubleTest` using the class template `MyFixture`. BENCHMARK_TEMPLATE_DEFINE_F(MyFixture, DoubleTest, double)(benchmark::State& st) { for (auto _ : st) { ... } } // `DoubleTest` is NOT registered. BENCHMARK_REGISTER_F(MyFixture, DoubleTest)->Threads(2); // `DoubleTest` is now registered. ``` If you want to use a method template for your fixtures, which you instantiate afterward, use the following macros: * `BENCHMARK_TEMPLATE_METHOD_F(ClassName, Method)` * `BENCHMARK_TEMPLATE_INSTANTIATE_F(ClassName, Method, ...)` With these macros you can define one method for several instantiations. Example (using `MyFixture` from above): ```c++ // Defines `Test` using the class template `MyFixture`. BENCHMARK_TEMPLATE_METHOD_F(MyFixture, Test)(benchmark::State& st) { for (auto _ : st) { ... } } // Instantiates and registers the benchmark `MyFixture::Test`. BENCHMARK_TEMPLATE_INSTANTIATE_F(MyFixture, Test, int)->Threads(2); // Instantiates and registers the benchmark `MyFixture::Test`. BENCHMARK_TEMPLATE_INSTANTIATE_F(MyFixture, Test, double)->Threads(4); ``` Inside the method definition of `BENCHMARK_TEMPLATE_METHOD_F` the type `Base` refers to the type of the instantiated fixture. Accesses to members of the fixture must be prefixed by `this->`. `BENCHMARK_TEMPLATE_METHOD_F`and `BENCHMARK_TEMPLATE_INSTANTIATE_F` can only be used, if the fixture does not use non-type template parameters. If you want to pass values as template parameters, use e.g. `std::integral_constant`. For example: ```c++ template class SizedFixture : public benchmark::Fixture { static constexpr auto Size = Sz::value; int myValue; }; BENCHMARK_TEMPLATE_METHOD_F(SizedFixture, Test)(benchmark::State& st) { for (auto _ : st) { this->myValue = Base::Size; } } BENCHMARK_TEMPLATE_INSTANTIATE_F(SizedFixture, Test, std::integral_constant<5>)->Threads(2); ``` ## Custom Counters You can add your own counters with user-defined names. The example below will add columns "Foo", "Bar" and "Baz" in its output: ```c++ static void UserCountersExample1(benchmark::State& state) { double numFoos = 0, numBars = 0, numBazs = 0; for (auto _ : state) { // ... count Foo,Bar,Baz events } state.counters["Foo"] = numFoos; state.counters["Bar"] = numBars; state.counters["Baz"] = numBazs; } ``` The `state.counters` object is a `std::map` with `std::string` keys and `Counter` values. The latter is a `double`-like class, via an implicit conversion to `double&`. Thus you can use all of the standard arithmetic assignment operators (`=,+=,-=,*=,/=`) to change the value of each counter. The `Counter` constructor accepts three parameters: the value as a `double` ; a bit flag which allows you to show counters as rates, and/or as per-thread iteration, and/or as per-thread averages, and/or iteration invariants, and/or finally inverting the result; and a flag specifying the 'unit' - i.e. is 1k a 1000 (default, `benchmark::Counter::OneK::kIs1000`), or 1024 (`benchmark::Counter::OneK::kIs1024`)? ```c++ // sets a simple counter state.counters["Foo"] = numFoos; // Set the counter as a rate. It will be presented divided // by the duration of the benchmark. // Meaning: per one second, how many 'foo's are processed? state.counters["FooRate"] = Counter(numFoos, benchmark::Counter::kIsRate); // Set the counter as a rate. It will be presented divided // by the duration of the benchmark, and the result inverted. // Meaning: how many seconds it takes to process one 'foo'? state.counters["FooInvRate"] = Counter(numFoos, benchmark::Counter::kIsRate | benchmark::Counter::kInvert); // Set the counter as a thread-average quantity. It will // be presented divided by the number of threads. state.counters["FooAvg"] = Counter(numFoos, benchmark::Counter::kAvgThreads); // There's also a combined flag: state.counters["FooAvgRate"] = Counter(numFoos,benchmark::Counter::kAvgThreadsRate); // This says that we process with the rate of state.range(0) bytes every iteration: state.counters["BytesProcessed"] = Counter(state.range(0), benchmark::Counter::kIsIterationInvariantRate, benchmark::Counter::OneK::kIs1024); ``` You can use `insert()` with `std::initializer_list`: ```c++ state.counters.insert({{"Foo", numFoos}, {"Bar", numBars}, {"Baz", numBazs}}); // ... instead of: state.counters["Foo"] = numFoos; state.counters["Bar"] = numBars; state.counters["Baz"] = numBazs; ``` In multithreaded benchmarks, each counter is set on the calling thread only. When the benchmark finishes, the counters from each thread will be summed. Counters that are configured with `kIsRate`, will report the average rate across all threads, while `kAvgThreadsRate` counters will report the average rate per thread. ### Counter Reporting When using the console reporter, by default, user counters are printed at the end after the table, the same way as ``bytes_processed`` and ``items_processed``. This is best for cases in which there are few counters, or where there are only a couple of lines per benchmark. Here's an example of the default output: ``` ------------------------------------------------------------------------------ Benchmark Time CPU Iterations UserCounters... ------------------------------------------------------------------------------ BM_UserCounter/threads:8 2248 ns 10277 ns 68808 Bar=16 Bat=40 Baz=24 Foo=8 BM_UserCounter/threads:1 9797 ns 9788 ns 71523 Bar=2 Bat=5 Baz=3 Foo=1024m BM_UserCounter/threads:2 4924 ns 9842 ns 71036 Bar=4 Bat=10 Baz=6 Foo=2 BM_UserCounter/threads:4 2589 ns 10284 ns 68012 Bar=8 Bat=20 Baz=12 Foo=4 BM_UserCounter/threads:8 2212 ns 10287 ns 68040 Bar=16 Bat=40 Baz=24 Foo=8 BM_UserCounter/threads:16 1782 ns 10278 ns 68144 Bar=32 Bat=80 Baz=48 Foo=16 BM_UserCounter/threads:32 1291 ns 10296 ns 68256 Bar=64 Bat=160 Baz=96 Foo=32 BM_UserCounter/threads:4 2615 ns 10307 ns 68040 Bar=8 Bat=20 Baz=12 Foo=4 BM_Factorial 26 ns 26 ns 26608979 40320 BM_Factorial/real_time 26 ns 26 ns 26587936 40320 BM_CalculatePiRange/1 16 ns 16 ns 45704255 0 BM_CalculatePiRange/8 73 ns 73 ns 9520927 3.28374 BM_CalculatePiRange/64 609 ns 609 ns 1140647 3.15746 BM_CalculatePiRange/512 4900 ns 4901 ns 142696 3.14355 ``` If this doesn't suit you, you can print each counter as a table column by passing the flag `--benchmark_counters_tabular=true` to the benchmark application. This is best for cases in which there are a lot of counters, or a lot of lines per individual benchmark. Note that this will trigger a reprinting of the table header any time the counter set changes between individual benchmarks. Here's an example of corresponding output when `--benchmark_counters_tabular=true` is passed: ``` --------------------------------------------------------------------------------------- Benchmark Time CPU Iterations Bar Bat Baz Foo --------------------------------------------------------------------------------------- BM_UserCounter/threads:8 2198 ns 9953 ns 70688 16 40 24 8 BM_UserCounter/threads:1 9504 ns 9504 ns 73787 2 5 3 1 BM_UserCounter/threads:2 4775 ns 9550 ns 72606 4 10 6 2 BM_UserCounter/threads:4 2508 ns 9951 ns 70332 8 20 12 4 BM_UserCounter/threads:8 2055 ns 9933 ns 70344 16 40 24 8 BM_UserCounter/threads:16 1610 ns 9946 ns 70720 32 80 48 16 BM_UserCounter/threads:32 1192 ns 9948 ns 70496 64 160 96 32 BM_UserCounter/threads:4 2506 ns 9949 ns 70332 8 20 12 4 -------------------------------------------------------------- Benchmark Time CPU Iterations -------------------------------------------------------------- BM_Factorial 26 ns 26 ns 26392245 40320 BM_Factorial/real_time 26 ns 26 ns 26494107 40320 BM_CalculatePiRange/1 15 ns 15 ns 45571597 0 BM_CalculatePiRange/8 74 ns 74 ns 9450212 3.28374 BM_CalculatePiRange/64 595 ns 595 ns 1173901 3.15746 BM_CalculatePiRange/512 4752 ns 4752 ns 147380 3.14355 BM_CalculatePiRange/4k 37970 ns 37972 ns 18453 3.14184 BM_CalculatePiRange/32k 303733 ns 303744 ns 2305 3.14162 BM_CalculatePiRange/256k 2434095 ns 2434186 ns 288 3.1416 BM_CalculatePiRange/1024k 9721140 ns 9721413 ns 71 3.14159 BM_CalculatePi/threads:8 2255 ns 9943 ns 70936 ``` Note above the additional header printed when the benchmark changes from ``BM_UserCounter`` to ``BM_Factorial``. This is because ``BM_Factorial`` does not have the same counter set as ``BM_UserCounter``. ## Multithreaded Benchmarks In a multithreaded test (benchmark invoked by multiple threads simultaneously), it is guaranteed that none of the threads will start until all have reached the start of the benchmark loop, and all will have finished before any thread exits the benchmark loop. (This behavior is also provided by the `KeepRunning()` API) As such, any global setup or teardown can be wrapped in a check against the thread index: ```c++ static void BM_MultiThreaded(benchmark::State& state) { if (state.thread_index() == 0) { // Setup code here. } for (auto _ : state) { // Run the test as normal. } if (state.thread_index() == 0) { // Teardown code here. } } BENCHMARK(BM_MultiThreaded)->Threads(2); ``` To run the benchmark across a range of thread counts, instead of `Threads`, use `ThreadRange`. This takes two parameters (`min_threads` and `max_threads`) and runs the benchmark once for values in the inclusive range. For example: ```c++ BENCHMARK(BM_MultiThreaded)->ThreadRange(1, 8); ``` will run `BM_MultiThreaded` with thread counts 1, 2, 4, and 8. If the benchmarked code itself uses threads and you want to compare it to single-threaded code, you may want to use real-time ("wallclock") measurements for latency comparisons: ```c++ BENCHMARK(BM_test)->Range(8, 8<<10)->UseRealTime(); ``` Without `UseRealTime`, CPU time is used by default. ### Manual Multithreaded Benchmarks Google/benchmark uses `std::thread` as multithreading environment per default. If you want to use another multithreading environment (e.g. OpenMP), you can provide a factory function to your benchmark using the `ThreadRunner` function. The factory function takes the number of threads as argument and creates a custom class derived from `benchmark::ThreadRunnerBase`. This custom class must override the function `void RunThreads(const std::function& fn)`. `RunThreads` is called by the main thread and spawns the requested number of threads. Each spawned thread must call `fn(thread_index)`, where `thread_index` is its own thread index. Before `RunThreads` returns, all spawned threads must be joined. ```c++ class OpenMPThreadRunner : public benchmark::ThreadRunnerBase { OpenMPThreadRunner(int num_threads) : num_threads_(num_threads) {} void RunThreads(const std::function& fn) final { #pragma omp parallel num_threads(num_threads_) fn(omp_get_thread_num()); } private: int num_threads_; }; BENCHMARK(BM_MultiThreaded) ->ThreadRunner([](int num_threads) { return std::make_unique(num_threads); }) ->Threads(1)->Threads(2)->Threads(4); ``` The above example creates a parallel OpenMP region before it enters `BM_MultiThreaded`. The actual benchmark code can remain the same and is therefore not tied to a specific thread runner. The measurement does not include the time for creating and joining the threads. ## CPU Timers By default, the CPU timer only measures the time spent by the main thread. If the benchmark itself uses threads internally, this measurement may not be what you are looking for. Instead, there is a way to measure the total CPU usage of the process, by all the threads. ```c++ void callee(int i); static void MyMain(int size) { #pragma omp parallel for for(int i = 0; i < size; i++) callee(i); } static void BM_OpenMP(benchmark::State& state) { for (auto _ : state) MyMain(state.range(0)); } // Measure the time spent by the main thread, use it to decide for how long to // run the benchmark loop. Depending on the internal implementation detail may // measure to anywhere from near-zero (the overhead spent before/after work // handoff to worker thread[s]) to the whole single-thread time. BENCHMARK(BM_OpenMP)->Range(8, 8<<10); // Measure the user-visible time, the wall clock (literally, the time that // has passed on the clock on the wall), use it to decide for how long to // run the benchmark loop. This will always be meaningful, and will match the // time spent by the main thread in single-threaded case, in general decreasing // with the number of internal threads doing the work. BENCHMARK(BM_OpenMP)->Range(8, 8<<10)->UseRealTime(); // Measure the total CPU consumption, use it to decide for how long to // run the benchmark loop. This will always measure to no less than the // time spent by the main thread in single-threaded case. BENCHMARK(BM_OpenMP)->Range(8, 8<<10)->MeasureProcessCPUTime(); // A mixture of the last two. Measure the total CPU consumption, but use the // wall clock to decide for how long to run the benchmark loop. BENCHMARK(BM_OpenMP)->Range(8, 8<<10)->MeasureProcessCPUTime()->UseRealTime(); ``` ### Controlling Timers Normally, the entire duration of the work loop (`for (auto _ : state) {}`) is measured. But sometimes, it is necessary to do some work inside of that loop, every iteration, but without counting that time to the benchmark time. That is possible, although it is not recommended, since it has high overhead. ```c++ static void BM_SetInsert_With_Timer_Control(benchmark::State& state) { std::set data; for (auto _ : state) { state.PauseTiming(); // Stop timers. They will not count until they are resumed. data = ConstructRandomSet(state.range(0)); // Do something that should not be measured state.ResumeTiming(); // And resume timers. They are now counting again. // The rest will be measured. for (int j = 0; j < state.range(1); ++j) data.insert(RandomNumber()); } } BENCHMARK(BM_SetInsert_With_Timer_Control)->Ranges({{1<<10, 8<<10}, {128, 512}}); ``` ## Manual Timing For benchmarking something for which neither CPU time nor real-time are correct or accurate enough, completely manual timing is supported using the `UseManualTime` function. When `UseManualTime` is used, the benchmarked code must call `SetIterationTime` once per iteration of the benchmark loop to report the manually measured time. An example use case for this is benchmarking GPU execution (e.g. OpenCL or CUDA kernels, OpenGL or Vulkan or Direct3D draw calls), which cannot be accurately measured using CPU time or real-time. Instead, they can be measured accurately using a dedicated API, and these measurement results can be reported back with `SetIterationTime`. ```c++ static void BM_ManualTiming(benchmark::State& state) { int microseconds = state.range(0); std::chrono::duration sleep_duration { static_cast(microseconds) }; for (auto _ : state) { auto start = std::chrono::high_resolution_clock::now(); // Simulate some useful workload with a sleep std::this_thread::sleep_for(sleep_duration); auto end = std::chrono::high_resolution_clock::now(); auto elapsed_seconds = std::chrono::duration_cast>( end - start); state.SetIterationTime(elapsed_seconds.count()); } } BENCHMARK(BM_ManualTiming)->Range(1, 1<<17)->UseManualTime(); ``` ## Setting the Time Unit If a benchmark runs a few milliseconds it may be hard to visually compare the measured times, since the output data is given in nanoseconds per default. In order to manually set the time unit, you can specify it manually: ```c++ BENCHMARK(BM_test)->Unit(benchmark::kMillisecond); ``` Additionally the default time unit can be set globally with the `--benchmark_time_unit={ns|us|ms|s}` command line argument. The argument only affects benchmarks where the time unit is not set explicitly. ## Preventing Optimization To prevent a value or expression from being optimized away by the compiler the `benchmark::DoNotOptimize(...)` and `benchmark::ClobberMemory()` functions can be used. ```c++ static void BM_test(benchmark::State& state) { for (auto _ : state) { int x = 0; for (int i=0; i < 64; ++i) { benchmark::DoNotOptimize(x += i); } } } ``` `DoNotOptimize()` forces the *result* of `` to be stored in either memory or a register. For GNU based compilers it acts as read/write barrier for global memory. More specifically it forces the compiler to flush pending writes to memory and reload any other values as necessary. Note that `DoNotOptimize()` does not prevent optimizations on `` in any way. `` may even be removed entirely when the result is already known. For example: ```c++ // Example 1: `` is removed entirely. int foo(int x) { return x + 42; } while (...) DoNotOptimize(foo(0)); // Optimized to DoNotOptimize(42); // Example 2: Result of '' is only reused. int bar(int) __attribute__((const)); while (...) DoNotOptimize(bar(0)); // Optimized to: // int __result__ = bar(0); // while (...) DoNotOptimize(__result__); ``` The second tool for preventing optimizations is `ClobberMemory()`. In essence `ClobberMemory()` forces the compiler to perform all pending writes to global memory. Memory managed by block scope objects must be "escaped" using `DoNotOptimize(...)` before it can be clobbered. In the below example `ClobberMemory()` prevents the call to `v.push_back(42)` from being optimized away. ```c++ static void BM_vector_push_back(benchmark::State& state) { for (auto _ : state) { std::vector v; v.reserve(1); auto data = v.data(); // Allow v.data() to be clobbered. Pass as non-const benchmark::DoNotOptimize(data); // lvalue to avoid undesired compiler optimizations v.push_back(42); benchmark::ClobberMemory(); // Force 42 to be written to memory. } } ``` Note that `ClobberMemory()` is only available for GNU or MSVC based compilers. ## Statistics: Reporting the Mean, Median and Standard Deviation / Coefficient of variation of Repeated Benchmarks By default each benchmark is run once and that single result is reported. However benchmarks are often noisy and a single result may not be representative of the overall behavior. For this reason it's possible to repeatedly rerun the benchmark. The number of runs of each benchmark is specified globally by the `--benchmark_repetitions` flag or on a per benchmark basis by calling `Repetitions` on the registered benchmark object. When a benchmark is run more than once the mean, median, standard deviation and coefficient of variation of the runs will be reported. Additionally the `--benchmark_report_aggregates_only={true|false}`, `--benchmark_display_aggregates_only={true|false}` flags or `ReportAggregatesOnly(bool)`, `DisplayAggregatesOnly(bool)` functions can be used to change how repeated tests are reported. By default the result of each repeated run is reported. When `report aggregates only` option is `true`, only the aggregates (i.e. mean, median, standard deviation and coefficient of variation, maybe complexity measurements if they were requested) of the runs is reported, to both the reporters - standard output (console), and the file. However when only the `display aggregates only` option is `true`, only the aggregates are displayed in the standard output, while the file output still contains everything. Calling `ReportAggregatesOnly(bool)` / `DisplayAggregatesOnly(bool)` on a registered benchmark object overrides the value of the appropriate flag for that benchmark. ## Custom Statistics While having these aggregates is nice, this may not be enough for everyone. For example you may want to know what the largest observation is, e.g. because you have some real-time constraints. This is easy. The following code will specify a custom statistic to be calculated, defined by a lambda function. ```c++ void BM_spin_empty(benchmark::State& state) { for (auto _ : state) { for (int x = 0; x < state.range(0); ++x) { benchmark::DoNotOptimize(x); } } } BENCHMARK(BM_spin_empty) ->Repetitions(3) // or add option --benchmark_repetitions=3 ->ComputeStatistics("max", [](const std::vector& v) -> double { return *(std::max_element(std::begin(v), std::end(v))); }) ->Arg(512); ``` While usually the statistics produce values in time units, you can also produce percentages: ```c++ void BM_spin_empty(benchmark::State& state) { for (auto _ : state) { for (int x = 0; x < state.range(0); ++x) { benchmark::DoNotOptimize(x); } } } BENCHMARK(BM_spin_empty) ->Repetitions(3) // or add option --benchmark_repetitions=3 ->ComputeStatistics("ratio", [](const std::vector& v) -> double { return v.front() / v.back(); }, benchmark::StatisticUnit::kPercentage) ->Arg(512); ``` ## Memory Usage It's often useful to also track memory usage for benchmarks, alongside CPU performance. For this reason, benchmark offers the `RegisterMemoryManager` method that allows a custom `MemoryManager` to be injected. If set, the `MemoryManager::Start` and `MemoryManager::Stop` methods will be called at the start and end of benchmark runs to allow user code to fill out a report on the number of allocations, bytes used, etc. This data will then be reported alongside other performance data, currently only when using JSON output. ## Profiling It's often useful to also profile benchmarks in particular ways, in addition to CPU performance. For this reason, benchmark offers the `RegisterProfilerManager` method that allows a custom `ProfilerManager` to be injected. If set, the `ProfilerManager::AfterSetupStart` and `ProfilerManager::BeforeTeardownStop` methods will be called at the start and end of a separate benchmark run to allow user code to collect and report user-provided profile metrics. Output collected from this profiling run must be reported separately. ## Using RegisterBenchmark(name, fn, args...) The `RegisterBenchmark(name, func, args...)` function provides an alternative way to create and register benchmarks. `RegisterBenchmark(name, func, args...)` creates, registers, and returns a pointer to a new benchmark with the specified `name` that invokes `func(st, args...)` where `st` is a `benchmark::State` object. Unlike the `BENCHMARK` registration macros, which can only be used at the global scope, the `RegisterBenchmark` can be called anywhere. This allows for benchmark tests to be registered programmatically. Additionally `RegisterBenchmark` allows any callable object to be registered as a benchmark. Including capturing lambdas and function objects. For Example: ```c++ auto BM_test = [](benchmark::State& st, auto Inputs) { /* ... */ }; int main(int argc, char** argv) { benchmark::MaybeReenterWithoutASLR(argc, argv); for (auto& test_input : { /* ... */ }) benchmark::RegisterBenchmark(test_input.name(), BM_test, test_input); benchmark::Initialize(&argc, argv); benchmark::RunSpecifiedBenchmarks(); benchmark::Shutdown(); } ``` ## Exiting with an Error When errors caused by external influences, such as file I/O and network communication, occur within a benchmark the `State::SkipWithError(const std::string& msg)` function can be used to skip that run of benchmark and report the error. Note that only future iterations of the `KeepRunning()` are skipped. For the ranged-for version of the benchmark loop Users must explicitly exit the loop, otherwise all iterations will be performed. Users may explicitly return to exit the benchmark immediately. The `SkipWithError(...)` function may be used at any point within the benchmark, including before and after the benchmark loop. Moreover, if `SkipWithError(...)` has been used, it is not required to reach the benchmark loop and one may return from the benchmark function early. For example: ```c++ static void BM_test(benchmark::State& state) { auto resource = GetResource(); if (!resource.good()) { state.SkipWithError("Resource is not good!"); // KeepRunning() loop will not be entered. } while (state.KeepRunning()) { auto data = resource.read_data(); if (!resource.good()) { state.SkipWithError("Failed to read data!"); break; // Needed to skip the rest of the iteration. } do_stuff(data); } } static void BM_test_ranged_fo(benchmark::State & state) { auto resource = GetResource(); if (!resource.good()) { state.SkipWithError("Resource is not good!"); return; // Early return is allowed when SkipWithError() has been used. } for (auto _ : state) { auto data = resource.read_data(); if (!resource.good()) { state.SkipWithError("Failed to read data!"); break; // REQUIRED to prevent all further iterations. } do_stuff(data); } } ``` ## A Faster KeepRunning Loop A ranged-based for loop should be used in preference to the `KeepRunning` loop for running the benchmarks. For example: ```c++ static void BM_Fast(benchmark::State &state) { for (auto _ : state) { FastOperation(); } } BENCHMARK(BM_Fast); ``` The reason the ranged-for loop is faster than using `KeepRunning`, is because `KeepRunning` requires a memory load and store of the iteration count ever iteration, whereas the ranged-for variant is able to keep the iteration count in a register. For example, an empty inner loop of using the ranged-based for method looks like: ```asm # Loop Init mov rbx, qword ptr [r14 + 104] call benchmark::State::StartKeepRunning() test rbx, rbx je .LoopEnd .LoopHeader: # =>This Inner Loop Header: Depth=1 add rbx, -1 jne .LoopHeader .LoopEnd: ``` Compared to an empty `KeepRunning` loop, which looks like: ```asm .LoopHeader: # in Loop: Header=BB0_3 Depth=1 cmp byte ptr [rbx], 1 jne .LoopInit .LoopBody: # =>This Inner Loop Header: Depth=1 mov rax, qword ptr [rbx + 8] lea rcx, [rax + 1] mov qword ptr [rbx + 8], rcx cmp rax, qword ptr [rbx + 104] jb .LoopHeader jmp .LoopEnd .LoopInit: mov rdi, rbx call benchmark::State::StartKeepRunning() jmp .LoopBody .LoopEnd: ``` Unless C++03 compatibility is required, the ranged-for variant of writing the benchmark loop should be preferred. ## Disabling CPU Frequency Scaling If you see this error: ``` ***WARNING*** CPU scaling is enabled, the benchmark real time measurements may be noisy and will incur extra overhead. ``` you might want to disable the CPU frequency scaling while running the benchmark, as well as consider other ways to stabilize the performance of your system while benchmarking. See [Reducing Variance](reducing_variance.md) for more information. ================================================ FILE: include/benchmark/benchmark.h ================================================ // Copyright 2015 Google Inc. All rights reserved. // // Licensed under the Apache License, Version 2.0 (the "License"); // you may not use this file except in compliance with the License. // You may obtain a copy of the License at // // http://www.apache.org/licenses/LICENSE-2.0 // // Unless required by applicable law or agreed to in writing, software // distributed under the License is distributed on an "AS IS" BASIS, // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. // See the License for the specific language governing permissions and // limitations under the License. #ifndef BENCHMARK_BENCHMARK_H_ #define BENCHMARK_BENCHMARK_H_ #include "benchmark/benchmark_api.h" #include "benchmark/counter.h" #include "benchmark/macros.h" #include "benchmark/managers.h" #include "benchmark/registration.h" #include "benchmark/reporter.h" #include "benchmark/state.h" #include "benchmark/statistics.h" #include "benchmark/sysinfo.h" #include "benchmark/types.h" #include "benchmark/utils.h" #endif // BENCHMARK_BENCHMARK_H_ ================================================ FILE: include/benchmark/benchmark_api.h ================================================ // Copyright 2015 Google Inc. All rights reserved. // // Licensed under the Apache License, Version 2.0 (the "License"); // you may not use this file except in compliance with the License. // You may obtain a copy of the License at // // http://www.apache.org/licenses/LICENSE-2.0 // // Unless required by applicable law or agreed to in writing, software // distributed under the License is distributed on an "AS IS" BASIS, // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. // See the License for the specific language governing permissions and // limitations under the License. #ifndef BENCHMARK_BENCHMARK_API_H_ #define BENCHMARK_BENCHMARK_API_H_ #if defined(_MSC_VER) #pragma warning(push) #pragma warning(disable : 4251) #endif #include #include #include #include #include #include "benchmark/counter.h" #include "benchmark/macros.h" #include "benchmark/state.h" #include "benchmark/statistics.h" #include "benchmark/types.h" namespace benchmark { const char kDefaultMinTimeStr[] = "0.5s"; BENCHMARK_EXPORT void MaybeReenterWithoutASLR(int, char**); BENCHMARK_EXPORT std::string GetBenchmarkVersion(); BENCHMARK_EXPORT void PrintDefaultHelp(); BENCHMARK_EXPORT void Initialize(int* argc, char** argv, void (*HelperPrintf)() = PrintDefaultHelp); BENCHMARK_EXPORT void Shutdown(); BENCHMARK_EXPORT bool ReportUnrecognizedArguments(int argc, char** argv); BENCHMARK_EXPORT std::string GetBenchmarkFilter(); BENCHMARK_EXPORT void SetBenchmarkFilter(std::string value); BENCHMARK_EXPORT int32_t GetBenchmarkVerbosity(); BENCHMARK_EXPORT BenchmarkReporter* CreateDefaultDisplayReporter(); BENCHMARK_EXPORT size_t RunSpecifiedBenchmarks(); BENCHMARK_EXPORT size_t RunSpecifiedBenchmarks(std::string spec); BENCHMARK_EXPORT size_t RunSpecifiedBenchmarks(BenchmarkReporter* display_reporter); BENCHMARK_EXPORT size_t RunSpecifiedBenchmarks(BenchmarkReporter* display_reporter, std::string spec); BENCHMARK_EXPORT size_t RunSpecifiedBenchmarks( BenchmarkReporter* display_reporter, BenchmarkReporter* file_reporter); BENCHMARK_EXPORT size_t RunSpecifiedBenchmarks(BenchmarkReporter* display_reporter, BenchmarkReporter* file_reporter, std::string spec); BENCHMARK_EXPORT TimeUnit GetDefaultTimeUnit(); BENCHMARK_EXPORT void SetDefaultTimeUnit(TimeUnit unit); BENCHMARK_EXPORT void AddCustomContext(std::string key, std::string value); struct ThreadRunnerBase { virtual ~ThreadRunnerBase() {} virtual void RunThreads(const std::function& fn) = 0; }; using threadrunner_factory = std::function(int)>; namespace internal { class BenchmarkFamilies; class BenchmarkInstance; } // namespace internal class BENCHMARK_EXPORT Benchmark { public: virtual ~Benchmark(); Benchmark* Name(const std::string& name); Benchmark* Arg(int64_t x); Benchmark* Unit(TimeUnit unit); Benchmark* Range(int64_t start, int64_t limit); Benchmark* DenseRange(int64_t start, int64_t limit, int step = 1); Benchmark* Args(const std::vector& args); Benchmark* ArgPair(int64_t x, int64_t y) { std::vector args; args.push_back(x); args.push_back(y); return Args(args); } Benchmark* Ranges(const std::vector>& ranges); Benchmark* ArgsProduct(const std::vector>& arglists); Benchmark* ArgName(const std::string& name); Benchmark* ArgNames(const std::vector& names); Benchmark* RangePair(int64_t lo1, int64_t hi1, int64_t lo2, int64_t hi2) { std::vector> ranges; ranges.push_back(std::make_pair(lo1, hi1)); ranges.push_back(std::make_pair(lo2, hi2)); return Ranges(ranges); } Benchmark* Setup(callback_function&&); Benchmark* Setup(const callback_function&); Benchmark* Teardown(callback_function&&); Benchmark* Teardown(const callback_function&); Benchmark* Apply(const std::function&); Benchmark* RangeMultiplier(int multiplier); Benchmark* MinTime(double t); Benchmark* MinWarmUpTime(double t); Benchmark* Iterations(IterationCount n); Benchmark* Repetitions(int n); Benchmark* ReportAggregatesOnly(bool value = true); Benchmark* DisplayAggregatesOnly(bool value = true); Benchmark* MeasureProcessCPUTime(); Benchmark* UseRealTime(); Benchmark* UseManualTime(); Benchmark* Complexity(BigO complexity = benchmark::oAuto); Benchmark* Complexity(BigOFunc* complexity); Benchmark* ComputeStatistics(const std::string& name, StatisticsFunc* statistics, StatisticUnit unit = kTime); Benchmark* Threads(int t); Benchmark* ThreadRange(int min_threads, int max_threads); Benchmark* DenseThreadRange(int min_threads, int max_threads, int stride = 1); Benchmark* ThreadPerCpu(); Benchmark* ThreadRunner(threadrunner_factory&& factory); virtual void Run(State& state) = 0; TimeUnit GetTimeUnit() const; protected: explicit Benchmark(const std::string& name); void SetName(const std::string& name); public: const char* GetName() const; int ArgsCnt() const; const char* GetArgName(int arg) const; private: friend class internal::BenchmarkFamilies; friend class internal::BenchmarkInstance; std::string name_; internal::AggregationReportMode aggregation_report_mode_; std::vector arg_names_; std::vector> args_; TimeUnit time_unit_; bool use_default_time_unit_; int range_multiplier_; double min_time_; double min_warmup_time_; IterationCount iterations_; int repetitions_; bool measure_process_cpu_time_; bool use_real_time_; bool use_manual_time_; BigO complexity_; BigOFunc* complexity_lambda_; std::vector statistics_; std::vector thread_counts_; callback_function setup_; callback_function teardown_; threadrunner_factory threadrunner_; BENCHMARK_DISALLOW_COPY_AND_ASSIGN(Benchmark); }; namespace internal { typedef BENCHMARK_DEPRECATED_MSG( "Use ::benchmark::Benchmark instead")::benchmark::Benchmark Benchmark; typedef BENCHMARK_DEPRECATED_MSG( "Use ::benchmark::threadrunner_factory instead")::benchmark:: threadrunner_factory threadrunner_factory; typedef void(Function)(State&); BENCHMARK_EXPORT ::benchmark::Benchmark* RegisterBenchmarkInternal( std::unique_ptr<::benchmark::Benchmark>); BENCHMARK_EXPORT std::map*& GetGlobalContext(); BENCHMARK_EXPORT void UseCharPointer(char const volatile*); BENCHMARK_EXPORT int InitializeStreams(); BENCHMARK_UNUSED static int stream_init_anchor = InitializeStreams(); } // namespace internal Benchmark* RegisterBenchmark(const std::string& name, internal::Function* fn); template Benchmark* RegisterBenchmark(const std::string& name, Lambda&& fn); BENCHMARK_EXPORT void ClearRegisteredBenchmarks(); namespace internal { class BENCHMARK_EXPORT FunctionBenchmark : public benchmark::Benchmark { public: FunctionBenchmark(const std::string& name, Function* func) : Benchmark(name), func_(func) {} void Run(State& st) override; private: Function* func_; }; template class LambdaBenchmark : public benchmark::Benchmark { public: void Run(State& st) override { lambda_(st); } template LambdaBenchmark(const std::string& name, OLambda&& lam) : Benchmark(name), lambda_(std::forward(lam)) {} private: LambdaBenchmark(LambdaBenchmark const&) = delete; Lambda lambda_; }; } // namespace internal inline Benchmark* RegisterBenchmark(const std::string& name, internal::Function* fn) { return internal::RegisterBenchmarkInternal( ::benchmark::internal::make_unique(name, fn)); } template Benchmark* RegisterBenchmark(const std::string& name, Lambda&& fn) { using BenchType = internal::LambdaBenchmark::type>; return internal::RegisterBenchmarkInternal( ::benchmark::internal::make_unique(name, std::forward(fn))); } template Benchmark* RegisterBenchmark(const std::string& name, Lambda&& fn, Args&&... args) { return benchmark::RegisterBenchmark( name, [=](benchmark::State& st) { fn(st, args...); }); } class Fixture : public Benchmark { public: Fixture() : Benchmark("") {} void Run(State& st) override { this->SetUp(st); this->BenchmarkCase(st); this->TearDown(st); } virtual void SetUp(const State&) {} virtual void TearDown(const State&) {} virtual void SetUp(State& st) { SetUp(const_cast(st)); } virtual void TearDown(State& st) { TearDown(const_cast(st)); } protected: virtual void BenchmarkCase(State&) = 0; }; BENCHMARK_EXPORT std::vector CreateRange(int64_t lo, int64_t hi, int multi); BENCHMARK_EXPORT std::vector CreateDenseRange(int64_t start, int64_t limit, int step); } // namespace benchmark #if defined(_MSC_VER) #pragma warning(pop) #endif #endif // BENCHMARK_BENCHMARK_API_H_ ================================================ FILE: include/benchmark/counter.h ================================================ // Copyright 2015 Google Inc. All rights reserved. // // Licensed under the Apache License, Version 2.0 (the "License"); // you may not use this file except in compliance with the License. // You may obtain a copy of the License at // // http://www.apache.org/licenses/LICENSE-2.0 // // Unless required by applicable law or agreed to in writing, software // distributed under the License is distributed on an "AS IS" BASIS, // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. // See the License for the specific language governing permissions and // limitations under the License. #ifndef BENCHMARK_COUNTER_H_ #define BENCHMARK_COUNTER_H_ #if defined(_MSC_VER) #pragma warning(push) #pragma warning(disable : 4251) #endif #include #include #include "benchmark/macros.h" #include "benchmark/types.h" namespace benchmark { class BENCHMARK_EXPORT Counter { public: enum Flags { kDefaults = 0, kIsRate = 1 << 0, kAvgThreads = 1 << 1, kAvgThreadsRate = kIsRate | kAvgThreads, kIsIterationInvariant = 1 << 2, kIsIterationInvariantRate = kIsRate | kIsIterationInvariant, kAvgIterations = 1 << 3, kAvgIterationsRate = kIsRate | kAvgIterations, kInvert = 1 << 31 }; enum OneK { kIs1000 = 1000, kIs1024 = 1024 }; double value; Flags flags; OneK oneK; BENCHMARK_ALWAYS_INLINE Counter(double v = 0., Flags f = kDefaults, OneK k = kIs1000) : value(v), flags(f), oneK(k) {} BENCHMARK_ALWAYS_INLINE operator double const&() const { return value; } BENCHMARK_ALWAYS_INLINE operator double&() { return value; } }; Counter::Flags inline operator|(const Counter::Flags& LHS, const Counter::Flags& RHS) { return static_cast(static_cast(LHS) | static_cast(RHS)); } using UserCounters = std::map; namespace internal { void Finish(UserCounters* l, IterationCount iterations, double cpu_time, double num_threads); void Increment(UserCounters* l, UserCounters const& r); bool SameNames(UserCounters const& l, UserCounters const& r); } // namespace internal } // namespace benchmark #if defined(_MSC_VER) #pragma warning(pop) #endif #endif // BENCHMARK_COUNTER_H_ ================================================ FILE: include/benchmark/export.h ================================================ #ifndef BENCHMARK_EXPORT_H #define BENCHMARK_EXPORT_H #if defined(_WIN32) #define EXPORT_ATTR __declspec(dllexport) #define IMPORT_ATTR __declspec(dllimport) #define NO_EXPORT_ATTR #define DEPRECATED_ATTR __declspec(deprecated) #else // _WIN32 #define EXPORT_ATTR __attribute__((visibility("default"))) #define IMPORT_ATTR __attribute__((visibility("default"))) #define NO_EXPORT_ATTR __attribute__((visibility("hidden"))) #define DEPRECATE_ATTR __attribute__((__deprecated__)) #endif // _WIN32 #ifdef BENCHMARK_STATIC_DEFINE #define BENCHMARK_EXPORT #define BENCHMARK_NO_EXPORT #else // BENCHMARK_STATIC_DEFINE #ifndef BENCHMARK_EXPORT #ifdef benchmark_EXPORTS /* We are building this library */ #define BENCHMARK_EXPORT EXPORT_ATTR #else // benchmark_EXPORTS /* We are using this library */ #define BENCHMARK_EXPORT IMPORT_ATTR #endif // benchmark_EXPORTS #endif // !BENCHMARK_EXPORT #ifndef BENCHMARK_NO_EXPORT #define BENCHMARK_NO_EXPORT NO_EXPORT_ATTR #endif // !BENCHMARK_NO_EXPORT #endif // BENCHMARK_STATIC_DEFINE #ifndef BENCHMARK_DEPRECATED #define BENCHMARK_DEPRECATED DEPRECATE_ATTR #endif // BENCHMARK_DEPRECATED #ifndef BENCHMARK_DEPRECATED_EXPORT #define BENCHMARK_DEPRECATED_EXPORT BENCHMARK_EXPORT BENCHMARK_DEPRECATED #endif // BENCHMARK_DEPRECATED_EXPORT #ifndef BENCHMARK_DEPRECATED_NO_EXPORT #define BENCHMARK_DEPRECATED_NO_EXPORT BENCHMARK_NO_EXPORT BENCHMARK_DEPRECATED #endif // BENCHMARK_DEPRECATED_EXPORT #endif /* BENCHMARK_EXPORT_H */ ================================================ FILE: include/benchmark/macros.h ================================================ // Copyright 2015 Google Inc. All rights reserved. // // Licensed under the Apache License, Version 2.0 (the "License"); // you may not use this file except in compliance with the License. // You may obtain a copy of the License at // // http://www.apache.org/licenses/LICENSE-2.0 // // Unless required by applicable law or agreed to in writing, software // distributed under the License is distributed on an "AS IS" BASIS, // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. // See the License for the specific language governing permissions and // limitations under the License. #ifndef BENCHMARK_MACROS_H_ #define BENCHMARK_MACROS_H_ #if defined(_MSC_VER) #include #endif #include #define BENCHMARK_DISALLOW_COPY_AND_ASSIGN(TypeName) \ TypeName(const TypeName&) = delete; \ TypeName& operator=(const TypeName&) = delete #ifdef BENCHMARK_HAS_CXX17 #define BENCHMARK_UNUSED [[maybe_unused]] #elif defined(__GNUC__) || defined(__clang__) #define BENCHMARK_UNUSED __attribute__((unused)) #else #define BENCHMARK_UNUSED #endif #if defined(__clang__) #define BENCHMARK_DONT_OPTIMIZE __attribute__((optnone)) #elif defined(__GNUC__) || defined(__GNUG__) #define BENCHMARK_DONT_OPTIMIZE __attribute__((optimize(0))) #else #define BENCHMARK_DONT_OPTIMIZE #endif #if defined(__GNUC__) || defined(__clang__) #define BENCHMARK_ALWAYS_INLINE __attribute__((always_inline)) #elif defined(_MSC_VER) && !defined(__clang__) #define BENCHMARK_ALWAYS_INLINE __forceinline #define __func__ __FUNCTION__ #else #define BENCHMARK_ALWAYS_INLINE #endif #define BENCHMARK_INTERNAL_TOSTRING2(x) #x #define BENCHMARK_INTERNAL_TOSTRING(x) BENCHMARK_INTERNAL_TOSTRING2(x) #if (defined(__GNUC__) && !defined(__NVCC__) && !defined(__NVCOMPILER)) || \ defined(__clang__) #define BENCHMARK_BUILTIN_EXPECT(x, y) __builtin_expect(x, y) #define BENCHMARK_DEPRECATED_MSG(msg) __attribute__((deprecated(msg))) #define BENCHMARK_DISABLE_DEPRECATED_WARNING \ _Pragma("GCC diagnostic push") \ _Pragma("GCC diagnostic ignored \"-Wdeprecated-declarations\"") #define BENCHMARK_RESTORE_DEPRECATED_WARNING _Pragma("GCC diagnostic pop") #elif defined(__NVCOMPILER) #define BENCHMARK_BUILTIN_EXPECT(x, y) __builtin_expect(x, y) #define BENCHMARK_DEPRECATED_MSG(msg) __attribute__((deprecated(msg))) #define BENCHMARK_DISABLE_DEPRECATED_WARNING \ _Pragma("diagnostic push") \ _Pragma("diag_suppress deprecated_entity_with_custom_message") #define BENCHMARK_RESTORE_DEPRECATED_WARNING _Pragma("diagnostic pop") #elif defined(_MSC_VER) #define BENCHMARK_BUILTIN_EXPECT(x, y) x #define BENCHMARK_DEPRECATED_MSG(msg) __declspec(deprecated(msg)) #define BENCHMARK_WARNING_MSG(msg) \ __pragma(message(__FILE__ "(" BENCHMARK_INTERNAL_TOSTRING( \ __LINE__) ") : warning note: " msg)) #define BENCHMARK_DISABLE_DEPRECATED_WARNING \ __pragma(warning(push)) __pragma(warning(disable : 4996)) #define BENCHMARK_RESTORE_DEPRECATED_WARNING __pragma(warning(pop)) #else #define BENCHMARK_BUILTIN_EXPECT(x, y) x #define BENCHMARK_DEPRECATED_MSG(msg) #define BENCHMARK_WARNING_MSG(msg) \ __pragma(message(__FILE__ "(" BENCHMARK_INTERNAL_TOSTRING( \ __LINE__) ") : warning note: " msg)) #define BENCHMARK_DISABLE_DEPRECATED_WARNING #define BENCHMARK_RESTORE_DEPRECATED_WARNING #endif #if defined(__GNUC__) && !defined(__clang__) #define BENCHMARK_GCC_VERSION (__GNUC__ * 100 + __GNUC_MINOR__) #endif #ifndef __has_builtin #define __has_builtin(x) 0 #endif #if defined(__GNUC__) || __has_builtin(__builtin_unreachable) #define BENCHMARK_UNREACHABLE() __builtin_unreachable() #elif defined(_MSC_VER) #define BENCHMARK_UNREACHABLE() __assume(false) #else #define BENCHMARK_UNREACHABLE() ((void)0) #endif #if defined(__GNUC__) #if defined(__i386__) || defined(__x86_64__) #define BENCHMARK_INTERNAL_CACHELINE_SIZE 64 #elif defined(__powerpc64__) #define BENCHMARK_INTERNAL_CACHELINE_SIZE 128 #elif defined(__aarch64__) #define BENCHMARK_INTERNAL_CACHELINE_SIZE 64 #elif defined(__arm__) #if defined(__ARM_ARCH_5T__) #define BENCHMARK_INTERNAL_CACHELINE_SIZE 32 #elif defined(__ARM_ARCH_7A__) #define BENCHMARK_INTERNAL_CACHELINE_SIZE 64 #endif #endif #endif #ifndef BENCHMARK_INTERNAL_CACHELINE_SIZE #define BENCHMARK_INTERNAL_CACHELINE_SIZE 64 #endif #if defined(__GNUC__) #define BENCHMARK_INTERNAL_CACHELINE_ALIGNED \ __attribute__((aligned(BENCHMARK_INTERNAL_CACHELINE_SIZE))) #elif defined(_MSC_VER) #define BENCHMARK_INTERNAL_CACHELINE_ALIGNED \ __declspec(align(BENCHMARK_INTERNAL_CACHELINE_SIZE)) #else #define BENCHMARK_INTERNAL_CACHELINE_ALIGNED #endif #endif // BENCHMARK_MACROS_H_ ================================================ FILE: include/benchmark/managers.h ================================================ // Copyright 2015 Google Inc. All rights reserved. // // Licensed under the Apache License, Version 2.0 (the "License"); // you may not use this file except in compliance with the License. // You may obtain a copy of the License at // // http://www.apache.org/licenses/LICENSE-2.0 // // Unless required by applicable law or agreed to in writing, software // distributed under the License is distributed on an "AS IS" BASIS, // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. // See the License for the specific language governing permissions and // limitations under the License. #ifndef BENCHMARK_MANAGERS_H_ #define BENCHMARK_MANAGERS_H_ #include #include #include "benchmark/macros.h" #include "benchmark/types.h" namespace benchmark { class MemoryManager { public: static constexpr int64_t TombstoneValue = std::numeric_limits::max(); struct Result { Result() : num_allocs(0), max_bytes_used(0), total_allocated_bytes(TombstoneValue), net_heap_growth(TombstoneValue), memory_iterations(0) {} int64_t num_allocs; int64_t max_bytes_used; int64_t total_allocated_bytes; int64_t net_heap_growth; IterationCount memory_iterations; }; virtual ~MemoryManager() {} virtual void Start() = 0; virtual void Stop(Result& result) = 0; }; BENCHMARK_EXPORT void RegisterMemoryManager(MemoryManager* memory_manager); class ProfilerManager { public: virtual ~ProfilerManager() {} virtual void AfterSetupStart() = 0; virtual void BeforeTeardownStop() = 0; }; BENCHMARK_EXPORT void RegisterProfilerManager(ProfilerManager* profiler_manager); } // namespace benchmark #endif // BENCHMARK_MANAGERS_H_ ================================================ FILE: include/benchmark/registration.h ================================================ // Copyright 2015 Google Inc. All rights reserved. // // Licensed under the Apache License, Version 2.0 (the "License"); // you may not use this file except in compliance with the License. // You may obtain a copy of the License at // // http://www.apache.org/licenses/LICENSE-2.0 // // Unless required by applicable law or agreed to in writing, software // distributed under the License is distributed on an "AS IS" BASIS, // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. // See the License for the specific language governing permissions and // limitations under the License. #ifndef BENCHMARK_REGISTRATION_H_ #define BENCHMARK_REGISTRATION_H_ #include "benchmark/benchmark_api.h" #include "benchmark/macros.h" #if defined(__clang__) #define BENCHMARK_DISABLE_COUNTER_WARNING \ _Pragma("GCC diagnostic push") \ _Pragma("GCC diagnostic ignored \"-Wunknown-warning-option\"") \ _Pragma("GCC diagnostic ignored \"-Wc2y-extensions\"") #define BENCHMARK_RESTORE_COUNTER_WARNING _Pragma("GCC diagnostic pop") #else #define BENCHMARK_DISABLE_COUNTER_WARNING #define BENCHMARK_RESTORE_COUNTER_WARNING #endif BENCHMARK_DISABLE_COUNTER_WARNING #if defined(__COUNTER__) && (__COUNTER__ + 1 == __COUNTER__ + 0) #define BENCHMARK_PRIVATE_UNIQUE_ID __COUNTER__ #else #define BENCHMARK_PRIVATE_UNIQUE_ID __LINE__ #endif BENCHMARK_RESTORE_COUNTER_WARNING #define BENCHMARK_PRIVATE_NAME(...) \ BENCHMARK_PRIVATE_CONCAT(benchmark_uniq_, BENCHMARK_PRIVATE_UNIQUE_ID, \ __VA_ARGS__) #define BENCHMARK_PRIVATE_CONCAT(a, b, c) BENCHMARK_PRIVATE_CONCAT2(a, b, c) #define BENCHMARK_PRIVATE_CONCAT2(a, b, c) a##b##c #define BENCHMARK_PRIVATE_CONCAT_NAME(BaseClass, Method) \ BaseClass##_##Method##_Benchmark #define BENCHMARK_PRIVATE_DECLARE(n) \ BENCHMARK_DISABLE_COUNTER_WARNING \ static ::benchmark::Benchmark const* const BENCHMARK_PRIVATE_NAME(n) \ BENCHMARK_RESTORE_COUNTER_WARNING BENCHMARK_UNUSED #define BENCHMARK(...) \ BENCHMARK_PRIVATE_DECLARE(_benchmark_) = \ (::benchmark::internal::RegisterBenchmarkInternal( \ ::benchmark::internal::make_unique< \ ::benchmark::internal::FunctionBenchmark>( \ #__VA_ARGS__, \ static_cast<::benchmark::internal::Function*>(__VA_ARGS__)))) #define BENCHMARK_WITH_ARG(n, a) BENCHMARK(n)->Arg((a)) #define BENCHMARK_WITH_ARG2(n, a1, a2) BENCHMARK(n)->Args({(a1), (a2)}) #define BENCHMARK_WITH_UNIT(n, t) BENCHMARK(n)->Unit((t)) #define BENCHMARK_RANGE(n, lo, hi) BENCHMARK(n)->Range((lo), (hi)) #define BENCHMARK_RANGE2(n, l1, h1, l2, h2) \ BENCHMARK(n)->RangePair({{(l1), (h1)}, {(l2), (h2)}}) #define BENCHMARK_CAPTURE(func, test_case_name, ...) \ BENCHMARK_PRIVATE_DECLARE(_benchmark_) = \ (::benchmark::internal::RegisterBenchmarkInternal( \ ::benchmark::internal::make_unique< \ ::benchmark::internal::FunctionBenchmark>( \ #func "/" #test_case_name, \ [](::benchmark::State& st) { func(st, __VA_ARGS__); }))) #define BENCHMARK_NAMED(func, test_case_name) \ BENCHMARK_PRIVATE_DECLARE(_benchmark_) = \ (::benchmark::internal::RegisterBenchmarkInternal( \ ::benchmark::internal::make_unique< \ ::benchmark::internal::FunctionBenchmark>( \ #func "/" #test_case_name, \ static_cast<::benchmark::internal::Function*>(func)))) #define BENCHMARK_TEMPLATE1(n, a) \ BENCHMARK_PRIVATE_DECLARE(n) = \ (::benchmark::internal::RegisterBenchmarkInternal( \ ::benchmark::internal::make_unique< \ ::benchmark::internal::FunctionBenchmark>( \ #n "<" #a ">", \ static_cast<::benchmark::internal::Function*>(n)))) #define BENCHMARK_TEMPLATE2(n, a, b) \ BENCHMARK_PRIVATE_DECLARE(n) = \ (::benchmark::internal::RegisterBenchmarkInternal( \ ::benchmark::internal::make_unique< \ ::benchmark::internal::FunctionBenchmark>( \ #n "<" #a "," #b ">", \ static_cast<::benchmark::internal::Function*>(n)))) #define BENCHMARK_TEMPLATE(n, ...) \ BENCHMARK_PRIVATE_DECLARE(n) = \ (::benchmark::internal::RegisterBenchmarkInternal( \ ::benchmark::internal::make_unique< \ ::benchmark::internal::FunctionBenchmark>( \ #n "<" #__VA_ARGS__ ">", \ static_cast<::benchmark::internal::Function*>(n<__VA_ARGS__>)))) #define BENCHMARK_TEMPLATE1_CAPTURE(func, a, test_case_name, ...) \ BENCHMARK_CAPTURE(func, test_case_name, __VA_ARGS__) #define BENCHMARK_TEMPLATE2_CAPTURE(func, a, b, test_case_name, ...) \ BENCHMARK_PRIVATE_DECLARE(func) = \ (::benchmark::internal::RegisterBenchmarkInternal( \ ::benchmark::internal::make_unique< \ ::benchmark::internal::FunctionBenchmark>( \ #func "<" #a "," #b ">" \ "/" #test_case_name, \ [](::benchmark::State& st) { func(st, __VA_ARGS__); }))) #define BENCHMARK_PRIVATE_DECLARE_F(BaseClass, Method) \ class BaseClass##_##Method##_Benchmark : public BaseClass { \ public: \ BaseClass##_##Method##_Benchmark() { \ this->SetName(#BaseClass "/" #Method); \ } \ \ protected: \ void BenchmarkCase(::benchmark::State&) override; \ }; #define BENCHMARK_TEMPLATE1_PRIVATE_DECLARE_F(BaseClass, Method, a) \ class BaseClass##_##Method##_Benchmark : public BaseClass { \ public: \ BaseClass##_##Method##_Benchmark() { \ this->SetName(#BaseClass "<" #a ">/" #Method); \ } \ \ protected: \ void BenchmarkCase(::benchmark::State&) override; \ }; #define BENCHMARK_TEMPLATE2_PRIVATE_DECLARE_F(BaseClass, Method, a, b) \ class BaseClass##_##Method##_Benchmark : public BaseClass { \ public: \ BaseClass##_##Method##_Benchmark() { \ this->SetName(#BaseClass "<" #a "," #b ">/" #Method); \ } \ \ protected: \ void BenchmarkCase(::benchmark::State&) override; \ }; #define BENCHMARK_TEMPLATE_PRIVATE_DECLARE_F(BaseClass, Method, ...) \ class BaseClass##_##Method##_Benchmark : public BaseClass<__VA_ARGS__> { \ public: \ BaseClass##_##Method##_Benchmark() { \ this->SetName(#BaseClass "<" #__VA_ARGS__ ">/" #Method); \ } \ \ protected: \ void BenchmarkCase(::benchmark::State&) override; \ }; #define BENCHMARK_DEFINE_F(BaseClass, Method) \ BENCHMARK_PRIVATE_DECLARE_F(BaseClass, Method) \ void BENCHMARK_PRIVATE_CONCAT_NAME(BaseClass, Method)::BenchmarkCase #define BENCHMARK_TEMPLATE1_DEFINE_F(BaseClass, Method, a) \ BENCHMARK_TEMPLATE1_PRIVATE_DECLARE_F(BaseClass, Method, a) \ void BENCHMARK_PRIVATE_CONCAT_NAME(BaseClass, Method)::BenchmarkCase #define BENCHMARK_TEMPLATE2_DEFINE_F(BaseClass, Method, a, b) \ BENCHMARK_TEMPLATE2_PRIVATE_DECLARE_F(BaseClass, Method, a, b) \ void BENCHMARK_PRIVATE_CONCAT_NAME(BaseClass, Method)::BenchmarkCase #define BENCHMARK_TEMPLATE_DEFINE_F(BaseClass, Method, ...) \ BENCHMARK_TEMPLATE_PRIVATE_DECLARE_F(BaseClass, Method, __VA_ARGS__) \ void BENCHMARK_PRIVATE_CONCAT_NAME(BaseClass, Method)::BenchmarkCase #define BENCHMARK_REGISTER_F(BaseClass, Method) \ BENCHMARK_PRIVATE_REGISTER_F(BENCHMARK_PRIVATE_CONCAT_NAME(BaseClass, Method)) #define BENCHMARK_PRIVATE_REGISTER_F(TestName) \ BENCHMARK_PRIVATE_DECLARE(TestName) = \ (::benchmark::internal::RegisterBenchmarkInternal( \ ::benchmark::internal::make_unique())) #define BENCHMARK_TEMPLATE_PRIVATE_CONCAT_NAME_F(BaseClass, Method) \ BaseClass##_##Method##_BenchmarkTemplate #define BENCHMARK_TEMPLATE_METHOD_F(BaseClass, Method) \ template \ class BENCHMARK_TEMPLATE_PRIVATE_CONCAT_NAME_F(BaseClass, Method) \ : public BaseClass { \ protected: \ using Base = BaseClass; \ void BenchmarkCase(::benchmark::State&) override; \ }; \ template \ void BENCHMARK_TEMPLATE_PRIVATE_CONCAT_NAME_F( \ BaseClass, Method)::BenchmarkCase #define BENCHMARK_TEMPLATE_PRIVATE_INSTANTIATE_F(BaseClass, Method, \ UniqueName, ...) \ class UniqueName : public BENCHMARK_TEMPLATE_PRIVATE_CONCAT_NAME_F( \ BaseClass, Method)<__VA_ARGS__> { \ public: \ UniqueName() { this->SetName(#BaseClass "<" #__VA_ARGS__ ">/" #Method); } \ }; \ BENCHMARK_PRIVATE_DECLARE(BaseClass##_##Method##_Benchmark) = \ (::benchmark::internal::RegisterBenchmarkInternal( \ ::benchmark::internal::make_unique())) #define BENCHMARK_TEMPLATE_INSTANTIATE_F(BaseClass, Method, ...) \ BENCHMARK_DISABLE_COUNTER_WARNING \ BENCHMARK_TEMPLATE_PRIVATE_INSTANTIATE_F( \ BaseClass, Method, BENCHMARK_PRIVATE_NAME(BaseClass##Method), \ __VA_ARGS__) \ BENCHMARK_RESTORE_COUNTER_WARNING #define BENCHMARK_F(BaseClass, Method) \ BENCHMARK_PRIVATE_DECLARE_F(BaseClass, Method) \ BENCHMARK_REGISTER_F(BaseClass, Method); \ void BENCHMARK_PRIVATE_CONCAT_NAME(BaseClass, Method)::BenchmarkCase #define BENCHMARK_TEMPLATE1_F(BaseClass, Method, a) \ BENCHMARK_TEMPLATE1_PRIVATE_DECLARE_F(BaseClass, Method, a) \ BENCHMARK_REGISTER_F(BaseClass, Method); \ void BENCHMARK_PRIVATE_CONCAT_NAME(BaseClass, Method)::BenchmarkCase #define BENCHMARK_TEMPLATE2_F(BaseClass, Method, a, b) \ BENCHMARK_TEMPLATE2_PRIVATE_DECLARE_F(BaseClass, Method, a, b) \ BENCHMARK_REGISTER_F(BaseClass, Method); \ void BENCHMARK_PRIVATE_CONCAT_NAME(BaseClass, Method)::BenchmarkCase #define BENCHMARK_TEMPLATE_F(BaseClass, Method, ...) \ BENCHMARK_TEMPLATE_PRIVATE_DECLARE_F(BaseClass, Method, __VA_ARGS__) \ void BENCHMARK_PRIVATE_CONCAT_NAME(BaseClass, Method)::BenchmarkCase #define BENCHMARK_MAIN() \ int main(int argc, char** argv) { \ benchmark::MaybeReenterWithoutASLR(argc, argv); \ char arg0_default[] = "benchmark"; \ char* args_default = reinterpret_cast(arg0_default); \ if (!argv) { \ argc = 1; \ argv = &args_default; \ } \ ::benchmark::Initialize(&argc, argv); \ if (::benchmark::ReportUnrecognizedArguments(argc, argv)) return 1; \ ::benchmark::RunSpecifiedBenchmarks(); \ ::benchmark::Shutdown(); \ return 0; \ } \ int main(int, char**) #endif // BENCHMARK_REGISTRATION_H_ ================================================ FILE: include/benchmark/reporter.h ================================================ // Copyright 2015 Google Inc. All rights reserved. // // Licensed under the Apache License, Version 2.0 (the "License"); // you may not use this file except in compliance with the License. // You may obtain a copy of the License at // // http://www.apache.org/licenses/LICENSE-2.0 // // Unless required by applicable law or agreed to in writing, software // distributed under the License is distributed on an "AS IS" BASIS, // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. // See the License for the specific language governing permissions and // limitations under the License. #ifndef BENCHMARK_REPORTER_H_ #define BENCHMARK_REPORTER_H_ #if defined(_MSC_VER) #pragma warning(push) // C4251: needs to have dll-interface to be used by clients of class #pragma warning(disable : 4251) #endif #include #include #include #include #include #include "benchmark/counter.h" #include "benchmark/macros.h" #include "benchmark/managers.h" #include "benchmark/statistics.h" #include "benchmark/sysinfo.h" #include "benchmark/types.h" namespace benchmark { struct BENCHMARK_EXPORT BenchmarkName { std::string function_name; std::string args; std::string min_time; std::string min_warmup_time; std::string iterations; std::string repetitions; std::string time_type; std::string threads; std::string str() const; }; class BENCHMARK_EXPORT BenchmarkReporter { public: struct Context { CPUInfo const& cpu_info; SystemInfo const& sys_info; size_t name_field_width = 0; static const char* executable_name; Context(); }; struct BENCHMARK_EXPORT Run { static const int64_t no_repetition_index = -1; enum RunType { RT_Iteration, RT_Aggregate }; Run() : run_type(RT_Iteration), aggregate_unit(kTime), skipped(internal::NotSkipped), iterations(1), threads(1), time_unit(kNanosecond), real_accumulated_time(0), cpu_accumulated_time(0), max_heapbytes_used(0), use_real_time_for_initial_big_o(false), complexity(oNone), complexity_lambda(), complexity_n(0), statistics(), report_big_o(false), report_rms(false), allocs_per_iter(0.0) {} std::string benchmark_name() const; BenchmarkName run_name; int64_t family_index; int64_t per_family_instance_index; RunType run_type; std::string aggregate_name; StatisticUnit aggregate_unit; std::string report_label; internal::Skipped skipped; std::string skip_message; IterationCount iterations; int64_t threads; int64_t repetition_index; int64_t repetitions; TimeUnit time_unit; double real_accumulated_time; double cpu_accumulated_time; double GetAdjustedRealTime() const; double GetAdjustedCPUTime() const; double max_heapbytes_used; bool use_real_time_for_initial_big_o; BigO complexity; BigOFunc* complexity_lambda; ComplexityN complexity_n; const std::vector* statistics; bool report_big_o; bool report_rms; UserCounters counters; MemoryManager::Result memory_result; double allocs_per_iter; }; struct PerFamilyRunReports { PerFamilyRunReports() : num_runs_total(0), num_runs_done(0) {} int num_runs_total; int num_runs_done; std::vector Runs; }; BenchmarkReporter(); virtual bool ReportContext(const Context& context) = 0; virtual void ReportRunsConfig(double /*min_time*/, bool /*has_explicit_iters*/, IterationCount /*iters*/) {} virtual void ReportRuns(const std::vector& report) = 0; virtual void Finalize() {} void SetOutputStream(std::ostream* out) { assert(out); output_stream_ = out; } void SetErrorStream(std::ostream* err) { assert(err); error_stream_ = err; } std::ostream& GetOutputStream() const { return *output_stream_; } std::ostream& GetErrorStream() const { return *error_stream_; } virtual ~BenchmarkReporter(); static void PrintBasicContext(std::ostream* out, Context const& context); private: std::ostream* output_stream_; std::ostream* error_stream_; }; class BENCHMARK_EXPORT ConsoleReporter : public BenchmarkReporter { public: enum OutputOptions { OO_None = 0, OO_Color = 1, OO_Tabular = 2, OO_ColorTabular = OO_Color | OO_Tabular, OO_Defaults = OO_ColorTabular }; explicit ConsoleReporter(OutputOptions opts_ = OO_Defaults) : output_options_(opts_), name_field_width_(0), printed_header_(false) {} bool ReportContext(const Context& context) override; void ReportRuns(const std::vector& reports) override; protected: virtual void PrintRunData(const Run& result); virtual void PrintHeader(const Run& run); OutputOptions output_options_; size_t name_field_width_; UserCounters prev_counters_; bool printed_header_; }; class BENCHMARK_EXPORT JSONReporter : public BenchmarkReporter { public: JSONReporter() : first_report_(true) {} bool ReportContext(const Context& context) override; void ReportRuns(const std::vector& reports) override; void Finalize() override; private: void PrintRunData(const Run& run); bool first_report_; }; class BENCHMARK_EXPORT BENCHMARK_DEPRECATED_MSG( "The CSV Reporter will be removed in a future release") CSVReporter : public BenchmarkReporter { public: CSVReporter() : printed_header_(false) {} bool ReportContext(const Context& context) override; void ReportRuns(const std::vector& reports) override; private: void PrintRunData(const Run& run); bool printed_header_; std::set user_counter_names_; }; inline const char* GetTimeUnitString(TimeUnit unit) { switch (unit) { case kSecond: return "s"; case kMillisecond: return "ms"; case kMicrosecond: return "us"; case kNanosecond: return "ns"; } BENCHMARK_UNREACHABLE(); } inline double GetTimeUnitMultiplier(TimeUnit unit) { switch (unit) { case kSecond: return 1; case kMillisecond: return 1e3; case kMicrosecond: return 1e6; case kNanosecond: return 1e9; } BENCHMARK_UNREACHABLE(); } } // namespace benchmark #if defined(_MSC_VER) #pragma warning(pop) #endif #endif // BENCHMARK_REPORTER_H_ ================================================ FILE: include/benchmark/state.h ================================================ // Copyright 2015 Google Inc. All rights reserved. // // Licensed under the Apache License, Version 2.0 (the "License"); // you may not use this file except in compliance with the License. // You may obtain a copy of the License at // // http://www.apache.org/licenses/LICENSE-2.0 // // Unless required by applicable law or agreed to in writing, software // distributed under the License is distributed on an "AS IS" BASIS, // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. // See the License for the specific language governing permissions and // limitations under the License. #ifndef BENCHMARK_STATE_H_ #define BENCHMARK_STATE_H_ #if defined(_MSC_VER) #pragma warning(push) #pragma warning(disable : 4251 4324) #endif #include #include #include #include "benchmark/counter.h" #include "benchmark/macros.h" #include "benchmark/statistics.h" #include "benchmark/types.h" namespace benchmark { namespace internal { class BenchmarkInstance; class ThreadTimer; class ThreadManager; class PerfCountersMeasurement; } // namespace internal class ProfilerManager; class BENCHMARK_EXPORT BENCHMARK_INTERNAL_CACHELINE_ALIGNED State { public: struct StateIterator; friend struct StateIterator; inline BENCHMARK_ALWAYS_INLINE StateIterator begin(); inline BENCHMARK_ALWAYS_INLINE StateIterator end(); inline bool KeepRunning(); inline bool KeepRunningBatch(IterationCount n); void PauseTiming(); void ResumeTiming(); void SkipWithMessage(const std::string& msg); void SkipWithError(const std::string& msg); bool skipped() const { return internal::NotSkipped != skipped_; } bool error_occurred() const { return internal::SkippedWithError == skipped_; } void SetIterationTime(double seconds); BENCHMARK_ALWAYS_INLINE void SetBytesProcessed(int64_t bytes) { counters["bytes_per_second"] = Counter(static_cast(bytes), Counter::kIsRate, Counter::kIs1024); } BENCHMARK_ALWAYS_INLINE int64_t bytes_processed() const { if (counters.find("bytes_per_second") != counters.end()) return static_cast(counters.at("bytes_per_second")); return 0; } BENCHMARK_ALWAYS_INLINE void SetComplexityN(ComplexityN complexity_n) { complexity_n_ = complexity_n; } BENCHMARK_ALWAYS_INLINE ComplexityN complexity_length_n() const { return complexity_n_; } BENCHMARK_ALWAYS_INLINE void SetItemsProcessed(int64_t items) { counters["items_per_second"] = Counter(static_cast(items), benchmark::Counter::kIsRate); } BENCHMARK_ALWAYS_INLINE int64_t items_processed() const { if (counters.find("items_per_second") != counters.end()) return static_cast(counters.at("items_per_second")); return 0; } void SetLabel(const std::string& label); BENCHMARK_ALWAYS_INLINE int64_t range(std::size_t pos = 0) const { assert(range_.size() > pos); return range_[pos]; } BENCHMARK_DEPRECATED_MSG("use 'range(0)' instead") int64_t range_x() const { return range(0); } BENCHMARK_DEPRECATED_MSG("use 'range(1)' instead") int64_t range_y() const { return range(1); } BENCHMARK_ALWAYS_INLINE int threads() const { return threads_; } BENCHMARK_ALWAYS_INLINE int thread_index() const { return thread_index_; } BENCHMARK_ALWAYS_INLINE IterationCount iterations() const { if (BENCHMARK_BUILTIN_EXPECT(!started_, false)) { return 0; } return max_iterations - total_iterations_ + batch_leftover_; } BENCHMARK_ALWAYS_INLINE std::string name() const { return name_; } size_t range_size() const { return range_.size(); } private: IterationCount total_iterations_; IterationCount batch_leftover_; public: const IterationCount max_iterations; private: bool started_; bool finished_; internal::Skipped skipped_; std::vector range_; ComplexityN complexity_n_; public: UserCounters counters; private: State(std::string name, IterationCount max_iters, const std::vector& ranges, int thread_i, int n_threads, internal::ThreadTimer* timer, internal::ThreadManager* manager, internal::PerfCountersMeasurement* perf_counters_measurement, ProfilerManager* profiler_manager); void StartKeepRunning(); inline bool KeepRunningInternal(IterationCount n, bool is_batch); void FinishKeepRunning(); const std::string name_; const int thread_index_; const int threads_; internal::ThreadTimer* const timer_; internal::ThreadManager* const manager_; internal::PerfCountersMeasurement* const perf_counters_measurement_; ProfilerManager* const profiler_manager_; friend class internal::BenchmarkInstance; }; inline BENCHMARK_ALWAYS_INLINE bool State::KeepRunning() { return KeepRunningInternal(1, /*is_batch=*/false); } inline BENCHMARK_ALWAYS_INLINE bool State::KeepRunningBatch(IterationCount n) { return KeepRunningInternal(n, /*is_batch=*/true); } inline BENCHMARK_ALWAYS_INLINE bool State::KeepRunningInternal(IterationCount n, bool is_batch) { assert(n > 0); assert(is_batch || n == 1); if (BENCHMARK_BUILTIN_EXPECT(total_iterations_ >= n, true)) { total_iterations_ -= n; return true; } if (!started_) { StartKeepRunning(); if (!skipped() && total_iterations_ >= n) { total_iterations_ -= n; return true; } } if (is_batch && total_iterations_ != 0) { batch_leftover_ = n - total_iterations_; total_iterations_ = 0; return true; } FinishKeepRunning(); return false; } struct State::StateIterator { struct BENCHMARK_UNUSED Value {}; typedef std::forward_iterator_tag iterator_category; typedef Value value_type; typedef Value reference; typedef Value pointer; typedef std::ptrdiff_t difference_type; private: friend class State; BENCHMARK_ALWAYS_INLINE StateIterator() : cached_(0), parent_() {} BENCHMARK_ALWAYS_INLINE explicit StateIterator(State* st) : cached_(st->skipped() ? 0 : st->max_iterations), parent_(st) {} public: BENCHMARK_ALWAYS_INLINE Value operator*() const { return Value(); } BENCHMARK_ALWAYS_INLINE StateIterator& operator++() { assert(cached_ > 0); --cached_; return *this; } BENCHMARK_ALWAYS_INLINE bool operator!=(StateIterator const&) const { if (BENCHMARK_BUILTIN_EXPECT(cached_ != 0, true)) return true; parent_->FinishKeepRunning(); return false; } private: IterationCount cached_; State* const parent_; }; inline BENCHMARK_ALWAYS_INLINE State::StateIterator State::begin() { return StateIterator(this); } inline BENCHMARK_ALWAYS_INLINE State::StateIterator State::end() { StartKeepRunning(); return StateIterator(); } } // namespace benchmark #if defined(_MSC_VER) #pragma warning(pop) #endif #endif // BENCHMARK_STATE_H_ ================================================ FILE: include/benchmark/statistics.h ================================================ // Copyright 2015 Google Inc. All rights reserved. // // Licensed under the Apache License, Version 2.0 (the "License"); // you may not use this file except in compliance with the License. // You may obtain a copy of the License at // // http://www.apache.org/licenses/LICENSE-2.0 // // Unless required by applicable law or agreed to in writing, software // distributed under the License is distributed on an "AS IS" BASIS, // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. // See the License for the specific language governing permissions and // limitations under the License. #ifndef BENCHMARK_STATISTICS_H_ #define BENCHMARK_STATISTICS_H_ #include #include #include "benchmark/types.h" namespace benchmark { enum BigO { oNone, o1, oN, oNSquared, oNCubed, oLogN, oNLogN, oAuto, oLambda }; typedef int64_t ComplexityN; enum StatisticUnit { kTime, kPercentage }; typedef double(BigOFunc)(ComplexityN); typedef double(StatisticsFunc)(const std::vector&); namespace internal { struct Statistics { std::string name_; StatisticsFunc* compute_; StatisticUnit unit_; Statistics(const std::string& name, StatisticsFunc* compute, StatisticUnit unit = kTime) : name_(name), compute_(compute), unit_(unit) {} }; enum AggregationReportMode : unsigned { ARM_Unspecified = 0, ARM_Default = 1U << 0U, ARM_FileReportAggregatesOnly = 1U << 1U, ARM_DisplayReportAggregatesOnly = 1U << 2U, ARM_ReportAggregatesOnly = ARM_FileReportAggregatesOnly | ARM_DisplayReportAggregatesOnly }; enum Skipped : unsigned { NotSkipped = 0, SkippedWithMessage, SkippedWithError }; } // namespace internal } // namespace benchmark #endif // BENCHMARK_STATISTICS_H_ ================================================ FILE: include/benchmark/sysinfo.h ================================================ // Copyright 2015 Google Inc. All rights reserved. // // Licensed under the Apache License, Version 2.0 (the "License"); // you may not use this file except in compliance with the License. // You may obtain a copy of the License at // // http://www.apache.org/licenses/LICENSE-2.0 // // Unless required by applicable law or agreed to in writing, software // distributed under the License is distributed on an "AS IS" BASIS, // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. // See the License for the specific language governing permissions and // limitations under the License. #ifndef BENCHMARK_SYSINFO_H_ #define BENCHMARK_SYSINFO_H_ #if defined(_MSC_VER) #pragma warning(push) #pragma warning(disable : 4251) #endif #include #include #include "benchmark/macros.h" namespace benchmark { struct BENCHMARK_EXPORT CPUInfo { struct CacheInfo { std::string type; int level; int size; int num_sharing; }; enum Scaling { UNKNOWN, ENABLED, DISABLED }; int num_cpus; Scaling scaling; double cycles_per_second; std::vector caches; std::vector load_avg; static const CPUInfo& Get(); private: CPUInfo(); BENCHMARK_DISALLOW_COPY_AND_ASSIGN(CPUInfo); }; struct BENCHMARK_EXPORT SystemInfo { enum class ASLR { UNKNOWN, ENABLED, DISABLED }; std::string name; ASLR ASLRStatus; static const SystemInfo& Get(); private: SystemInfo(); BENCHMARK_DISALLOW_COPY_AND_ASSIGN(SystemInfo); }; } // namespace benchmark #if defined(_MSC_VER) #pragma warning(pop) #endif #endif // BENCHMARK_SYSINFO_H_ ================================================ FILE: include/benchmark/types.h ================================================ // Copyright 2015 Google Inc. All rights reserved. // // Licensed under the Apache License, Version 2.0 (the "License"); // you may not use this file except in compliance with the License. // You may obtain a copy of the License at // // http://www.apache.org/licenses/LICENSE-2.0 // // Unless required by applicable law or agreed to in writing, software // distributed under the License is distributed on an "AS IS" BASIS, // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. // See the License for the specific language governing permissions and // limitations under the License. #ifndef BENCHMARK_TYPES_H_ #define BENCHMARK_TYPES_H_ #include #include #include #include #include "benchmark/export.h" namespace benchmark { namespace internal { #if (__cplusplus < 201402L || (defined(_MSC_VER) && _MSVC_LANG < 201402L)) template std::unique_ptr make_unique(Args&&... args) { return std::unique_ptr(new T(std::forward(args)...)); } #else using ::std::make_unique; #endif } // namespace internal class BenchmarkReporter; class State; using IterationCount = int64_t; using callback_function = std::function; enum TimeUnit { kNanosecond, kMicrosecond, kMillisecond, kSecond }; } // namespace benchmark #endif // BENCHMARK_TYPES_H_ ================================================ FILE: include/benchmark/utils.h ================================================ // Copyright 2015 Google Inc. All rights reserved. // // Licensed under the Apache License, Version 2.0 (the "License"); // you may not use this file except in compliance with the License. // You may obtain a copy of the License at // // http://www.apache.org/licenses/LICENSE-2.0 // // Unless required by applicable law or agreed to in writing, software // distributed under the License is distributed on an "AS IS" BASIS, // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. // See the License for the specific language governing permissions and // limitations under the License. #ifndef BENCHMARK_UTILS_H_ #define BENCHMARK_UTILS_H_ #include #include #include #include "benchmark/macros.h" namespace benchmark { namespace internal { BENCHMARK_EXPORT void UseCharPointer(char const volatile*); } #if (!defined(__GNUC__) && !defined(__clang__)) || defined(__pnacl__) || \ defined(__EMSCRIPTEN__) #define BENCHMARK_HAS_NO_INLINE_ASSEMBLY #endif inline BENCHMARK_ALWAYS_INLINE void ClobberMemory() { std::atomic_signal_fence(std::memory_order_acq_rel); } #ifndef BENCHMARK_HAS_NO_INLINE_ASSEMBLY #if !defined(__GNUC__) || defined(__llvm__) || defined(__INTEL_COMPILER) template BENCHMARK_DEPRECATED_MSG( "The const-ref version of this method can permit " "undesired compiler optimizations in benchmarks") inline BENCHMARK_ALWAYS_INLINE void DoNotOptimize(Tp const& value) { asm volatile("" : : "r,m"(value) : "memory"); } template inline BENCHMARK_ALWAYS_INLINE void DoNotOptimize(Tp& value) { #if defined(__clang__) asm volatile("" : "+r,m"(value) : : "memory"); #else asm volatile("" : "+m,r"(value) : : "memory"); #endif } template inline BENCHMARK_ALWAYS_INLINE void DoNotOptimize(Tp&& value) { #if defined(__clang__) asm volatile("" : "+r,m"(value) : : "memory"); #else asm volatile("" : "+m,r"(value) : : "memory"); #endif } #elif (__GNUC__ >= 5) template BENCHMARK_DEPRECATED_MSG( "The const-ref version of this method can permit " "undesired compiler optimizations in benchmarks") inline BENCHMARK_ALWAYS_INLINE typename std::enable_if::value && (sizeof(Tp) <= sizeof(Tp*))>::type DoNotOptimize(Tp const& value) { asm volatile("" : : "r,m"(value) : "memory"); } template BENCHMARK_DEPRECATED_MSG( "The const-ref version of this method can permit " "undesired compiler optimizations in benchmarks") inline BENCHMARK_ALWAYS_INLINE typename std::enable_if::value || (sizeof(Tp) > sizeof(Tp*))>::type DoNotOptimize(Tp const& value) { asm volatile("" : : "m"(value) : "memory"); } template inline BENCHMARK_ALWAYS_INLINE typename std::enable_if::value && (sizeof(Tp) <= sizeof(Tp*))>::type DoNotOptimize(Tp& value) { asm volatile("" : "+m,r"(value) : : "memory"); } template inline BENCHMARK_ALWAYS_INLINE typename std::enable_if::value || (sizeof(Tp) > sizeof(Tp*))>::type DoNotOptimize(Tp& value) { asm volatile("" : "+m"(value) : : "memory"); } template inline BENCHMARK_ALWAYS_INLINE typename std::enable_if::value && (sizeof(Tp) <= sizeof(Tp*))>::type DoNotOptimize(Tp&& value) { asm volatile("" : "+m,r"(value) : : "memory"); } template inline BENCHMARK_ALWAYS_INLINE typename std::enable_if::value || (sizeof(Tp) > sizeof(Tp*))>::type DoNotOptimize(Tp&& value) { asm volatile("" : "+m"(value) : : "memory"); } #endif #elif defined(_MSC_VER) template BENCHMARK_DEPRECATED_MSG( "The const-ref version of this method can permit " "undesired compiler optimizations in benchmarks") inline BENCHMARK_ALWAYS_INLINE void DoNotOptimize(Tp const& value) { internal::UseCharPointer(&reinterpret_cast(value)); _ReadWriteBarrier(); } template inline BENCHMARK_ALWAYS_INLINE void DoNotOptimize(Tp& value) { internal::UseCharPointer(&reinterpret_cast(value)); _ReadWriteBarrier(); } template inline BENCHMARK_ALWAYS_INLINE void DoNotOptimize(Tp&& value) { internal::UseCharPointer(&reinterpret_cast(value)); _ReadWriteBarrier(); } #else template inline BENCHMARK_ALWAYS_INLINE void DoNotOptimize(Tp&& value) { internal::UseCharPointer(&reinterpret_cast(value)); } #endif } // end namespace benchmark #endif // BENCHMARK_UTILS_H_ ================================================ FILE: pyproject.toml ================================================ [build-system] requires = ["setuptools"] build-backend = "setuptools.build_meta" [project] name = "google_benchmark" description = "A library to benchmark code snippets." requires-python = ">=3.10" license = "Apache-2.0" keywords = ["benchmark"] authors = [{ name = "Google", email = "benchmark-discuss@googlegroups.com" }] classifiers = [ "Development Status :: 4 - Beta", "Intended Audience :: Developers", "Intended Audience :: Science/Research", "Programming Language :: Python :: 3.10", "Programming Language :: Python :: 3.11", "Programming Language :: Python :: 3.12", "Programming Language :: Python :: 3.13", "Topic :: Software Development :: Testing", "Topic :: System :: Benchmark", ] dynamic = ["readme", "version"] [dependency-groups] dev = ["pre-commit>=3.3.3"] [project.urls] Homepage = "https://github.com/google/benchmark" Documentation = "https://github.com/google/benchmark/tree/main/docs" Repository = "https://github.com/google/benchmark.git" Discord = "https://discord.gg/cz7UX7wKC2" [tool.setuptools] package-dir = { "" = "bindings/python" } zip-safe = false [tool.setuptools.packages.find] where = ["bindings/python"] [tool.setuptools.dynamic] readme = { file = "README.md", content-type = "text/markdown" } version = { attr = "google_benchmark.__version__" } [tool.mypy] check_untyped_defs = true disallow_incomplete_defs = true pretty = true python_version = "3.11" strict_optional = false warn_unreachable = true [[tool.mypy.overrides]] module = ["yaml"] ignore_missing_imports = true [tool.ruff] # explicitly tell ruff the source directory to correctly identify first-party package. src = ["bindings/python"] line-length = 80 target-version = "py311" [tool.ruff.lint] # Enable pycodestyle (`E`, `W`), Pyflakes (`F`), and isort (`I`) codes by default. select = ["ASYNC", "B", "C4", "C90", "E", "F", "I", "PERF", "PIE", "PT018", "RUF", "SIM", "UP", "W"] ignore = [ "PLW2901", # redefined-loop-name "UP031", # printf-string-formatting ] [tool.ruff.lint.isort] combine-as-imports = true ================================================ FILE: setup.py ================================================ import contextlib import os import platform import re import shutil import sys from collections.abc import Generator from pathlib import Path from typing import Any import setuptools from setuptools.command import build_ext IS_WINDOWS = platform.system() == "Windows" IS_MAC = platform.system() == "Darwin" IS_LINUX = platform.system() == "Linux" # hardcoded SABI-related options. Requires that each Python interpreter # (hermetic or not) participating is of the same major-minor version. py_limited_api = sys.version_info >= (3, 12) options = {"bdist_wheel": {"py_limited_api": "cp312"}} if py_limited_api else {} def is_cibuildwheel() -> bool: return os.getenv("CIBUILDWHEEL") is not None @contextlib.contextmanager def _maybe_patch_toolchains() -> Generator[None, None, None]: """ Patch rules_python toolchains to ignore root user error when run in a Docker container on Linux in cibuildwheel. """ def fmt_toolchain_args(matchobj): suffix = "ignore_root_user_error = True" callargs = matchobj.group(1) # toolchain def is broken over multiple lines if callargs.endswith("\n"): callargs = callargs + " " + suffix + ",\n" # toolchain def is on one line. else: callargs = callargs + ", " + suffix return "python.toolchain(" + callargs + ")" CIBW_LINUX = is_cibuildwheel() and IS_LINUX module_bazel = Path("MODULE.bazel") content: str = module_bazel.read_text() try: if CIBW_LINUX: module_bazel.write_text( re.sub( r"python.toolchain\(([\w\"\s,.=]*)\)", fmt_toolchain_args, content, ) ) yield finally: if CIBW_LINUX: module_bazel.write_text(content) class BazelExtension(setuptools.Extension): """A C/C++ extension that is defined as a Bazel BUILD target.""" def __init__(self, name: str, bazel_target: str, **kwargs: Any): super().__init__(name=name, sources=[], **kwargs) self.bazel_target = bazel_target stripped_target = bazel_target.split("//")[-1] self.relpath, self.target_name = stripped_target.split(":") class BuildBazelExtension(build_ext.build_ext): """A command that runs Bazel to build a C/C++ extension.""" def run(self): for ext in self.extensions: self.bazel_build(ext) # explicitly call `bazel shutdown` for graceful exit self.spawn(["bazel", "shutdown"]) def copy_extensions_to_source(self): """ Copy generated extensions into the source tree. This is done in the ``bazel_build`` method, so it's not necessary to do again in the `build_ext` base class. """ def bazel_build(self, ext: BazelExtension) -> None: # noqa: C901 """Runs the bazel build to create the package.""" temp_path = Path(self.build_temp) # We round to the minor version, which makes rules_python # look up the latest available patch version internally. python_version = "{}.{}".format(*sys.version_info[:2]) bazel_argv = [ "bazel", "run", ext.bazel_target, f"--symlink_prefix={temp_path / 'bazel-'}", f"--compilation_mode={'dbg' if self.debug else 'opt'}", # C++17 is required by nanobind f"--cxxopt={'/std:c++17' if IS_WINDOWS else '-std=c++17'}", f"--@rules_python//python/config_settings:python_version={python_version}", ] if ext.py_limited_api: bazel_argv += ["--@nanobind_bazel//:py-limited-api=cp312"] if IS_WINDOWS: # Link with python*.lib. for library_dir in self.library_dirs: bazel_argv.append("--linkopt=/LIBPATH:" + library_dir) elif IS_MAC: # C++17 needs macOS 10.14 at minimum bazel_argv.append("--macos_minimum_os=10.14") with _maybe_patch_toolchains(): self.spawn(bazel_argv) if IS_WINDOWS: suffix = ".pyd" else: suffix = ".abi3.so" if ext.py_limited_api else ".so" # copy the Bazel build artifacts into setuptools' libdir, # from where the wheel is built. pkgname = "google_benchmark" pythonroot = Path("bindings") / "python" / "google_benchmark" srcdir = temp_path / "bazel-bin" / pythonroot if not self.inplace: libdir = Path(self.build_lib) / pkgname else: build_py = self.get_finalized_command("build_py") libdir = Path(build_py.get_package_dir(pkgname)) for root, dirs, files in os.walk(srcdir, topdown=True): # exclude runfiles directories and children. dirs[:] = [d for d in dirs if "runfiles" not in d] for f in files: fp = Path(f) should_copy = False # we do not want the bare .so file included # when building for ABI3, so we require a # full and exact match on the file extension. if "".join(fp.suffixes) == suffix or fp.suffix == ".pyi": should_copy = True elif Path(root) == srcdir and f == "py.typed": # copy py.typed, but only at the package root. should_copy = True if should_copy: shutil.copyfile(root / fp, libdir / fp) setuptools.setup( cmdclass={"build_ext": BuildBazelExtension}, package_data={"google_benchmark": ["py.typed", "*.pyi"]}, ext_modules=[ BazelExtension( name="google_benchmark._benchmark", bazel_target="//bindings/python/google_benchmark:benchmark_stubgen", py_limited_api=py_limited_api, ) ], options=options, ) ================================================ FILE: src/CMakeLists.txt ================================================ #Allow the source files to find headers in src / include(GNUInstallDirs) include_directories(${PROJECT_SOURCE_DIR}/src) if (DEFINED BENCHMARK_CXX_LINKER_FLAGS) list(APPEND CMAKE_SHARED_LINKER_FLAGS ${BENCHMARK_CXX_LINKER_FLAGS}) list(APPEND CMAKE_MODULE_LINKER_FLAGS ${BENCHMARK_CXX_LINKER_FLAGS}) endif() file(GLOB SOURCE_FILES *.cc ${PROJECT_SOURCE_DIR}/include/benchmark/*.h ${CMAKE_CURRENT_SOURCE_DIR}/*.h) file(GLOB BENCHMARK_MAIN "benchmark_main.cc") foreach(item ${BENCHMARK_MAIN}) list(REMOVE_ITEM SOURCE_FILES "${item}") endforeach() add_library(benchmark ${SOURCE_FILES}) add_library(benchmark::benchmark ALIAS benchmark) set_target_properties(benchmark PROPERTIES OUTPUT_NAME "benchmark" VERSION ${GENERIC_LIB_VERSION} SOVERSION ${GENERIC_LIB_SOVERSION} ) target_include_directories(benchmark PUBLIC $ ) set_property( SOURCE benchmark.cc APPEND PROPERTY COMPILE_DEFINITIONS BENCHMARK_VERSION="${VERSION}" ) # libpfm, if available if (PFM_FOUND) target_link_libraries(benchmark PRIVATE PFM::libpfm) target_compile_definitions(benchmark PRIVATE -DHAVE_LIBPFM) install( FILES "${PROJECT_SOURCE_DIR}/cmake/Modules/FindPFM.cmake" DESTINATION "${CMAKE_INSTALL_LIBDIR}/cmake/${PROJECT_NAME}") endif() # pthread affinity, if available if(HAVE_PTHREAD_AFFINITY) target_compile_definitions(benchmark PRIVATE -DBENCHMARK_HAS_PTHREAD_AFFINITY) endif() target_link_libraries(benchmark PRIVATE ${BENCHMARK_CXX_LIBRARIES}) if(HAVE_LIB_RT) target_link_libraries(benchmark PRIVATE rt) endif(HAVE_LIB_RT) # We need extra libraries on Windows if(${CMAKE_SYSTEM_NAME} MATCHES "Windows") target_link_libraries(benchmark PRIVATE shlwapi) endif() # We need extra libraries on Solaris if(${CMAKE_SYSTEM_NAME} MATCHES "SunOS") target_link_libraries(benchmark PRIVATE kstat) endif() if (NOT BUILD_SHARED_LIBS) target_compile_definitions(benchmark PUBLIC -DBENCHMARK_STATIC_DEFINE) endif() # Benchmark main library add_library(benchmark_main "benchmark_main.cc") add_library(benchmark::benchmark_main ALIAS benchmark_main) set_target_properties(benchmark_main PROPERTIES OUTPUT_NAME "benchmark_main" VERSION ${GENERIC_LIB_VERSION} SOVERSION ${GENERIC_LIB_SOVERSION} DEFINE_SYMBOL benchmark_EXPORTS ) target_link_libraries(benchmark_main PUBLIC benchmark::benchmark) set(generated_dir "${PROJECT_BINARY_DIR}") set(version_config "${generated_dir}/${PROJECT_NAME}ConfigVersion.cmake") set(project_config "${generated_dir}/${PROJECT_NAME}Config.cmake") set(pkg_config "${generated_dir}/${PROJECT_NAME}.pc") set(pkg_config_main "${generated_dir}/${PROJECT_NAME}_main.pc") set(targets_to_export benchmark benchmark_main) set(targets_export_name "${PROJECT_NAME}Targets") set(namespace "${PROJECT_NAME}::") include(CMakePackageConfigHelpers) configure_package_config_file ( ${PROJECT_SOURCE_DIR}/cmake/Config.cmake.in ${project_config} INSTALL_DESTINATION ${CMAKE_INSTALL_LIBDIR}/cmake/${PROJECT_NAME} NO_SET_AND_CHECK_MACRO NO_CHECK_REQUIRED_COMPONENTS_MACRO ) write_basic_package_version_file( "${version_config}" VERSION ${GENERIC_LIB_VERSION} COMPATIBILITY SameMajorVersion ) # Derive private link libraries from target if(NOT BUILD_SHARED_LIBS) get_target_property(LINK_LIBS benchmark LINK_LIBRARIES) if(LINK_LIBS) set(BENCHMARK_PRIVATE_LINK_LIBRARIES "") foreach(LIB IN LISTS LINK_LIBS) if(NOT TARGET "${LIB}" AND LIB MATCHES "^[a-zA-Z0-9_.-]+$") list(APPEND BENCHMARK_PRIVATE_LINK_LIBRARIES "-l${LIB}") endif() endforeach() string(JOIN " " BENCHMARK_PRIVATE_LINK_LIBRARIES ${BENCHMARK_PRIVATE_LINK_LIBRARIES}) endif() endif() configure_file("${PROJECT_SOURCE_DIR}/cmake/benchmark.pc.in" "${pkg_config}" @ONLY) configure_file("${PROJECT_SOURCE_DIR}/cmake/benchmark_main.pc.in" "${pkg_config_main}" @ONLY) export ( TARGETS ${targets_to_export} NAMESPACE "${namespace}" FILE ${generated_dir}/${targets_export_name}.cmake ) if (BENCHMARK_ENABLE_INSTALL) # Install target (will install the library to specified CMAKE_INSTALL_PREFIX variable) install( TARGETS ${targets_to_export} EXPORT ${targets_export_name} ARCHIVE DESTINATION ${CMAKE_INSTALL_LIBDIR} LIBRARY DESTINATION ${CMAKE_INSTALL_LIBDIR} RUNTIME DESTINATION ${CMAKE_INSTALL_BINDIR} INCLUDES DESTINATION ${CMAKE_INSTALL_INCLUDEDIR}) install( DIRECTORY "${PROJECT_SOURCE_DIR}/include/benchmark" "${PROJECT_BINARY_DIR}/include/benchmark" DESTINATION ${CMAKE_INSTALL_INCLUDEDIR} FILES_MATCHING PATTERN "*.*h") install( FILES "${project_config}" "${version_config}" DESTINATION "${CMAKE_INSTALL_LIBDIR}/cmake/${PROJECT_NAME}") install( FILES "${pkg_config}" "${pkg_config_main}" DESTINATION "${CMAKE_INSTALL_LIBDIR}/pkgconfig") install( EXPORT "${targets_export_name}" NAMESPACE "${namespace}" DESTINATION "${CMAKE_INSTALL_LIBDIR}/cmake/${PROJECT_NAME}") endif() if (BENCHMARK_ENABLE_DOXYGEN) find_package(Doxygen REQUIRED) set(DOXYGEN_QUIET YES) set(DOXYGEN_RECURSIVE YES) set(DOXYGEN_GENERATE_HTML YES) set(DOXYGEN_GENERATE_MAN NO) set(DOXYGEN_MARKDOWN_SUPPORT YES) set(DOXYGEN_BUILTIN_STL_SUPPORT YES) set(DOXYGEN_EXTRACT_PACKAGE YES) set(DOXYGEN_EXTRACT_STATIC YES) set(DOXYGEN_SHOW_INCLUDE_FILES YES) set(DOXYGEN_BINARY_TOC YES) set(DOXYGEN_TOC_EXPAND YES) set(DOXYGEN_USE_MDFILE_AS_MAINPAGE "index.md") doxygen_add_docs(benchmark_doxygen docs include src ALL WORKING_DIRECTORY ${PROJECT_SOURCE_DIR} COMMENT "Building documentation with Doxygen.") if (BENCHMARK_ENABLE_INSTALL AND BENCHMARK_INSTALL_DOCS) install( DIRECTORY "${CMAKE_CURRENT_BINARY_DIR}/html/" DESTINATION ${CMAKE_INSTALL_DOCDIR}) endif() else() if (BENCHMARK_ENABLE_INSTALL AND BENCHMARK_INSTALL_DOCS) install( DIRECTORY "${PROJECT_SOURCE_DIR}/docs/" DESTINATION ${CMAKE_INSTALL_DOCDIR}) endif() endif() set(CMAKE_INSTALL_PYTOOLSDIR "${CMAKE_INSTALL_DATADIR}/googlebenchmark/tools" CACHE PATH "") if (BENCHMARK_ENABLE_INSTALL AND BENCHMARK_INSTALL_TOOLS) install( DIRECTORY "${PROJECT_SOURCE_DIR}/tools/" DESTINATION ${CMAKE_INSTALL_PYTOOLSDIR}) endif() ================================================ FILE: src/arraysize.h ================================================ #ifndef BENCHMARK_ARRAYSIZE_H_ #define BENCHMARK_ARRAYSIZE_H_ #include "internal_macros.h" namespace benchmark { namespace internal { // The arraysize(arr) macro returns the # of elements in an array arr. // The expression is a compile-time constant, and therefore can be // used in defining new arrays, for example. If you use arraysize on // a pointer by mistake, you will get a compile-time error. // // This template function declaration is used in defining arraysize. // Note that the function doesn't need an implementation, as we only // use its type. template char (&ArraySizeHelper(T (&array)[N]))[N]; // That gcc wants both of these prototypes seems mysterious. VC, for // its part, can't decide which to use (another mystery). Matching of // template overloads: the final frontier. #ifndef COMPILER_MSVC template char (&ArraySizeHelper(const T (&array)[N]))[N]; #endif #define arraysize(array) (sizeof(::benchmark::internal::ArraySizeHelper(array))) } // end namespace internal } // end namespace benchmark #endif // BENCHMARK_ARRAYSIZE_H_ ================================================ FILE: src/benchmark.cc ================================================ // Copyright 2015 Google Inc. All rights reserved. // // Licensed under the Apache License, Version 2.0 (the "License"); // you may not use this file except in compliance with the License. // You may obtain a copy of the License at // // http://www.apache.org/licenses/LICENSE-2.0 // // Unless required by applicable law or agreed to in writing, software // distributed under the License is distributed on an "AS IS" BASIS, // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. // See the License for the specific language governing permissions and // limitations under the License. #include "benchmark/benchmark_api.h" #include "benchmark/registration.h" #include "benchmark/reporter.h" #include "benchmark/state.h" #include "benchmark/types.h" #include "benchmark_api_internal.h" #include "benchmark_runner.h" #include "internal_macros.h" #ifndef BENCHMARK_OS_WINDOWS #if !defined(BENCHMARK_OS_FUCHSIA) && !defined(BENCHMARK_OS_QURT) && \ !defined(BENCHMARK_OS_WASI) #include #endif #include #include #endif #ifdef BENCHMARK_OS_LINUX #include #endif #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include "check.h" #include "colorprint.h" #include "commandlineflags.h" #include "complexity.h" #include "counter.h" #include "log.h" #include "mutex.h" #include "perf_counters.h" #include "re.h" #include "statistics.h" #include "string_util.h" #include "thread_manager.h" #include "thread_timer.h" namespace benchmark { // Print a list of benchmarks. This option overrides all other options. BM_DEFINE_bool(benchmark_list_tests, false); // A regular expression that specifies the set of benchmarks to execute. If // this flag is empty, or if this flag is the string \"all\", all benchmarks // linked into the binary are run. BM_DEFINE_string(benchmark_filter, ""); // Specification of how long to run the benchmark. // // It can be either an exact number of iterations (specified as `x`), // or a minimum number of seconds (specified as `s`). If the latter // format (ie., min seconds) is used, the system may run the benchmark longer // until the results are considered significant. // // For backward compatibility, the `s` suffix may be omitted, in which case, // the specified number is interpreted as the number of seconds. // // For cpu-time based tests, this is the lower bound // on the total cpu time used by all threads that make up the test. For // real-time based tests, this is the lower bound on the elapsed time of the // benchmark execution, regardless of number of threads. BM_DEFINE_string(benchmark_min_time, kDefaultMinTimeStr); // Minimum number of seconds a benchmark should be run before results should be // taken into account. This e.g can be necessary for benchmarks of code which // needs to fill some form of cache before performance is of interest. // Note: results gathered within this period are discarded and not used for // reported result. BM_DEFINE_double(benchmark_min_warmup_time, 0.0); // The number of runs of each benchmark. If greater than 1, the mean and // standard deviation of the runs will be reported. BM_DEFINE_int32(benchmark_repetitions, 1); // If enabled, forces each benchmark to execute exactly one iteration and one // repetition, bypassing any configured // MinTime()/MinWarmUpTime()/Iterations()/Repetitions() BM_DEFINE_bool(benchmark_dry_run, false); // If set, enable random interleaving of repetitions of all benchmarks. // See http://github.com/google/benchmark/issues/1051 for details. BM_DEFINE_bool(benchmark_enable_random_interleaving, false); // Report the result of each benchmark repetitions. When 'true' is specified // only the mean, standard deviation, and other statistics are reported for // repeated benchmarks. Affects all reporters. BM_DEFINE_bool(benchmark_report_aggregates_only, false); // Display the result of each benchmark repetitions. When 'true' is specified // only the mean, standard deviation, and other statistics are displayed for // repeated benchmarks. Unlike benchmark_report_aggregates_only, only affects // the display reporter, but *NOT* file reporter, which will still contain // all the output. BM_DEFINE_bool(benchmark_display_aggregates_only, false); // The format to use for console output. // Valid values are 'console', 'json', or 'csv'. BM_DEFINE_string(benchmark_format, "console"); // The format to use for file output. // Valid values are 'console', 'json', or 'csv'. BM_DEFINE_string(benchmark_out_format, "json"); // The file to write additional output to. BM_DEFINE_string(benchmark_out, ""); // Whether to use colors in the output. Valid values: // 'true'/'yes'/1, 'false'/'no'/0, and 'auto'. 'auto' means to use colors if // the output is being sent to a terminal and the TERM environment variable is // set to a terminal type that supports colors. BM_DEFINE_string(benchmark_color, "auto"); // Whether to use tabular format when printing user counters to the console. // Valid values: 'true'/'yes'/1, 'false'/'no'/0. Defaults to false. BM_DEFINE_bool(benchmark_counters_tabular, false); // List of additional perf counters to collect, in libpfm format. For more // information about libpfm: https://man7.org/linux/man-pages/man3/libpfm.3.html BM_DEFINE_string(benchmark_perf_counters, ""); // Extra context to include in the output formatted as comma-separated key-value // pairs. Kept internal as it's only used for parsing from env/command line. BM_DEFINE_kvpairs(benchmark_context, {}); // Set the default time unit to use for reports // Valid values are 'ns', 'us', 'ms' or 's' BM_DEFINE_string(benchmark_time_unit, ""); // The level of verbose logging to output BM_DEFINE_int32(v, 0); namespace internal { // NOLINTNEXTLINE(cppcoreguidelines-avoid-non-const-global-variables) std::map* global_context = nullptr; BENCHMARK_EXPORT std::map*& GetGlobalContext() { return global_context; } namespace { // NOLINTNEXTLINE(cppcoreguidelines-avoid-non-const-global-variables) void const volatile* volatile global_force_escape_pointer; } // namespace // FIXME: Verify if LTO still messes this up? void UseCharPointer(char const volatile* const v) { // We want to escape the pointer `v` so that the compiler can not eliminate // computations that produced it. To do that, we escape the pointer by storing // it into a volatile variable, since generally, volatile store, is not // something the compiler is allowed to elide. global_force_escape_pointer = reinterpret_cast(v); } } // namespace internal State::State(std::string name, IterationCount max_iters, const std::vector& ranges, int thread_i, int n_threads, internal::ThreadTimer* timer, internal::ThreadManager* manager, internal::PerfCountersMeasurement* perf_counters_measurement, ProfilerManager* profiler_manager) : total_iterations_(0), batch_leftover_(0), max_iterations(max_iters), started_(false), finished_(false), skipped_(internal::NotSkipped), range_(ranges), complexity_n_(0), name_(std::move(name)), thread_index_(thread_i), threads_(n_threads), timer_(timer), manager_(manager), perf_counters_measurement_(perf_counters_measurement), profiler_manager_(profiler_manager) { BM_CHECK(max_iterations != 0) << "At least one iteration must be run"; BM_CHECK_LT(thread_index_, threads_) << "thread_index must be less than threads"; // Add counters with correct flag now. If added with `counters[name]` in // `PauseTiming`, a new `Counter` will be inserted the first time, which // won't have the flag. Inserting them now also reduces the allocations // during the benchmark. if (perf_counters_measurement_ != nullptr) { for (const std::string& counter_name : perf_counters_measurement_->names()) { counters[counter_name] = Counter(0.0, Counter::kAvgIterations); } } // Note: The use of offsetof below is technically undefined until C++17 // because State is not a standard layout type. However, all compilers // currently provide well-defined behavior as an extension (which is // demonstrated since constexpr evaluation must diagnose all undefined // behavior). However, GCC and Clang also warn about this use of offsetof, // which must be suppressed. #if defined(__INTEL_COMPILER) #pragma warning push #pragma warning(disable : 1875) #elif defined(__GNUC__) || defined(__clang__) #pragma GCC diagnostic push #pragma GCC diagnostic ignored "-Winvalid-offsetof" #endif #if defined(__NVCC__) #pragma nv_diagnostic push #pragma nv_diag_suppress 1427 #endif #if defined(__NVCOMPILER) #pragma diagnostic push #pragma diag_suppress offset_in_non_POD_nonstandard #endif // Offset tests to ensure commonly accessed data is on the first cache line. const int cache_line_size = 64; static_assert( offsetof(State, skipped_) <= (cache_line_size - sizeof(skipped_)), ""); #if defined(__INTEL_COMPILER) #pragma warning pop #elif defined(__GNUC__) || defined(__clang__) #pragma GCC diagnostic pop #endif #if defined(__NVCC__) #pragma nv_diagnostic pop #endif #if defined(__NVCOMPILER) #pragma diagnostic pop #endif } void State::PauseTiming() { // Add in time accumulated so far BM_CHECK(started_ && !finished_ && !skipped()); timer_->StopTimer(); if (perf_counters_measurement_ != nullptr) { std::vector> measurements; if (!perf_counters_measurement_->Stop(measurements)) { BM_CHECK(false) << "Perf counters read the value failed."; } for (const auto& name_and_measurement : measurements) { const std::string& name = name_and_measurement.first; const double measurement = name_and_measurement.second; // Counter was inserted with `kAvgIterations` flag by the constructor. assert(counters.find(name) != counters.end()); counters[name].value += measurement; } } } void State::ResumeTiming() { BM_CHECK(started_ && !finished_ && !skipped()); timer_->StartTimer(); if (perf_counters_measurement_ != nullptr) { perf_counters_measurement_->Start(); } } void State::SkipWithMessage(const std::string& msg) { skipped_ = internal::SkippedWithMessage; { MutexLock l(manager_->GetBenchmarkMutex()); if (internal::NotSkipped == manager_->results.skipped_) { manager_->results.skip_message_ = msg; manager_->results.skipped_ = skipped_; } } total_iterations_ = 0; if (timer_->running()) { timer_->StopTimer(); } } void State::SkipWithError(const std::string& msg) { skipped_ = internal::SkippedWithError; { MutexLock l(manager_->GetBenchmarkMutex()); if (internal::NotSkipped == manager_->results.skipped_) { manager_->results.skip_message_ = msg; manager_->results.skipped_ = skipped_; } } total_iterations_ = 0; if (timer_->running()) { timer_->StopTimer(); } } void State::SetIterationTime(double seconds) { timer_->SetIterationTime(seconds); } void State::SetLabel(const std::string& label) { MutexLock l(manager_->GetBenchmarkMutex()); manager_->results.report_label_ = label; } void State::StartKeepRunning() { BM_CHECK(!started_ && !finished_); started_ = true; total_iterations_ = skipped() ? 0 : max_iterations; if (BENCHMARK_BUILTIN_EXPECT(profiler_manager_ != nullptr, false)) { profiler_manager_->AfterSetupStart(); } manager_->StartStopBarrier(); if (!skipped()) { ResumeTiming(); } } void State::FinishKeepRunning() { BM_CHECK(started_ && (!finished_ || skipped())); if (!skipped()) { PauseTiming(); } // Total iterations has now wrapped around past 0. Fix this. total_iterations_ = 0; finished_ = true; manager_->StartStopBarrier(); if (BENCHMARK_BUILTIN_EXPECT(profiler_manager_ != nullptr, false)) { profiler_manager_->BeforeTeardownStop(); } } namespace internal { namespace { // Flushes streams after invoking reporter methods that write to them. This // ensures users get timely updates even when streams are not line-buffered. void FlushStreams(BenchmarkReporter* reporter) { if (reporter == nullptr) { return; } std::flush(reporter->GetOutputStream()); std::flush(reporter->GetErrorStream()); } // Reports in both display and file reporters. void Report(BenchmarkReporter* display_reporter, BenchmarkReporter* file_reporter, const RunResults& run_results) { auto report_one = [](BenchmarkReporter* reporter, bool aggregates_only, const RunResults& results) { assert(reporter); // If there are no aggregates, do output non-aggregates. aggregates_only &= !results.aggregates_only.empty(); if (!aggregates_only) { reporter->ReportRuns(results.non_aggregates); } if (!results.aggregates_only.empty()) { reporter->ReportRuns(results.aggregates_only); } }; report_one(display_reporter, run_results.display_report_aggregates_only, run_results); if (file_reporter != nullptr) { report_one(file_reporter, run_results.file_report_aggregates_only, run_results); } FlushStreams(display_reporter); FlushStreams(file_reporter); } void RunBenchmarks(const std::vector& benchmarks, BenchmarkReporter* display_reporter, BenchmarkReporter* file_reporter) { // Note the file_reporter can be null. BM_CHECK(display_reporter != nullptr); // Determine the width of the name field using a minimum width of 10. bool might_have_aggregates = FLAGS_benchmark_repetitions > 1; size_t name_field_width = 10; size_t stat_field_width = 0; for (const BenchmarkInstance& benchmark : benchmarks) { name_field_width = std::max(name_field_width, benchmark.name().str().size()); might_have_aggregates |= benchmark.repetitions() > 1; for (const auto& Stat : benchmark.statistics()) { stat_field_width = std::max(stat_field_width, Stat.name_.size()); } } if (might_have_aggregates) { name_field_width += 1 + stat_field_width; } // Print header here BenchmarkReporter::Context context; context.name_field_width = name_field_width; // Keep track of running times of all instances of each benchmark family. std::map per_family_reports; if (display_reporter->ReportContext(context) && ((file_reporter == nullptr) || file_reporter->ReportContext(context))) { FlushStreams(display_reporter); FlushStreams(file_reporter); size_t num_repetitions_total = 0; // This perfcounters object needs to be created before the runners vector // below so it outlasts their lifetime. PerfCountersMeasurement perfcounters( StrSplit(FLAGS_benchmark_perf_counters, ',')); // Vector of benchmarks to run std::vector runners; runners.reserve(benchmarks.size()); // Count the number of benchmarks with threads to warn the user in case // performance counters are used. int benchmarks_with_threads = 0; // Loop through all benchmarks for (const BenchmarkInstance& benchmark : benchmarks) { BenchmarkReporter::PerFamilyRunReports* reports_for_family = nullptr; if (benchmark.complexity() != oNone) { reports_for_family = &per_family_reports[benchmark.family_index()]; } benchmarks_with_threads += static_cast(benchmark.threads() > 1); runners.emplace_back(benchmark, &perfcounters, reports_for_family); int num_repeats_of_this_instance = runners.back().GetNumRepeats(); num_repetitions_total += static_cast(num_repeats_of_this_instance); if (reports_for_family != nullptr) { reports_for_family->num_runs_total += num_repeats_of_this_instance; } } assert(runners.size() == benchmarks.size() && "Unexpected runner count."); // The use of performance counters with threads would be unintuitive for // the average user so we need to warn them about this case if ((benchmarks_with_threads > 0) && (perfcounters.num_counters() > 0)) { GetErrorLogInstance() << "***WARNING*** There are " << benchmarks_with_threads << " benchmarks with threads and " << perfcounters.num_counters() << " performance counters were requested. Beware counters will " "reflect the combined usage across all " "threads.\n"; } std::vector repetition_indices; repetition_indices.reserve(num_repetitions_total); for (size_t runner_index = 0, num_runners = runners.size(); runner_index != num_runners; ++runner_index) { const internal::BenchmarkRunner& runner = runners[runner_index]; std::fill_n(std::back_inserter(repetition_indices), runner.GetNumRepeats(), runner_index); } assert(repetition_indices.size() == num_repetitions_total && "Unexpected number of repetition indexes."); if (FLAGS_benchmark_enable_random_interleaving) { std::random_device rd; std::mt19937 g(rd()); std::shuffle(repetition_indices.begin(), repetition_indices.end(), g); } for (size_t repetition_index : repetition_indices) { internal::BenchmarkRunner& runner = runners[repetition_index]; runner.DoOneRepetition(); if (runner.HasRepeatsRemaining()) { continue; } // FIXME: report each repetition separately, not all of them in bulk. display_reporter->ReportRunsConfig( runner.GetMinTime(), runner.HasExplicitIters(), runner.GetIters()); if (file_reporter != nullptr) { file_reporter->ReportRunsConfig( runner.GetMinTime(), runner.HasExplicitIters(), runner.GetIters()); } RunResults run_results = runner.GetResults(); // Maybe calculate complexity report if (const auto* reports_for_family = runner.GetReportsForFamily()) { if (reports_for_family->num_runs_done == reports_for_family->num_runs_total) { auto additional_run_stats = ComputeBigO(reports_for_family->Runs); run_results.aggregates_only.insert(run_results.aggregates_only.end(), additional_run_stats.begin(), additional_run_stats.end()); per_family_reports.erase( static_cast(reports_for_family->Runs.front().family_index)); } } Report(display_reporter, file_reporter, run_results); } } display_reporter->Finalize(); if (file_reporter != nullptr) { file_reporter->Finalize(); } FlushStreams(display_reporter); FlushStreams(file_reporter); } // Disable deprecated warnings temporarily because we need to reference // CSVReporter but don't want to trigger -Werror=-Wdeprecated-declarations BENCHMARK_DISABLE_DEPRECATED_WARNING std::unique_ptr CreateReporter( std::string const& name, ConsoleReporter::OutputOptions output_opts) { typedef std::unique_ptr PtrType; if (name == "console") { return PtrType(new ConsoleReporter(output_opts)); } if (name == "json") { return PtrType(new JSONReporter()); } if (name == "csv") { return PtrType(new CSVReporter()); } std::cerr << "Unexpected format: '" << name << "'\n"; std::flush(std::cerr); std::exit(1); } BENCHMARK_RESTORE_DEPRECATED_WARNING } // end namespace bool IsZero(double n) { return std::abs(n) < std::numeric_limits::epsilon(); } ConsoleReporter::OutputOptions GetOutputOptions(bool force_no_color) { int output_opts = ConsoleReporter::OO_Defaults; auto is_benchmark_color = [force_no_color]() -> bool { if (force_no_color) { return false; } if (FLAGS_benchmark_color == "auto") { return IsColorTerminal(); } return IsTruthyFlagValue(FLAGS_benchmark_color); }; if (is_benchmark_color()) { output_opts |= ConsoleReporter::OO_Color; } else { output_opts &= ~ConsoleReporter::OO_Color; } if (FLAGS_benchmark_counters_tabular) { output_opts |= ConsoleReporter::OO_Tabular; } else { output_opts &= ~ConsoleReporter::OO_Tabular; } return static_cast(output_opts); } } // end namespace internal BenchmarkReporter* CreateDefaultDisplayReporter() { static auto* default_display_reporter = internal::CreateReporter(FLAGS_benchmark_format, internal::GetOutputOptions()) .release(); return default_display_reporter; } size_t RunSpecifiedBenchmarks() { return RunSpecifiedBenchmarks(nullptr, nullptr, FLAGS_benchmark_filter); } size_t RunSpecifiedBenchmarks(std::string spec) { return RunSpecifiedBenchmarks(nullptr, nullptr, std::move(spec)); } size_t RunSpecifiedBenchmarks(BenchmarkReporter* display_reporter) { return RunSpecifiedBenchmarks(display_reporter, nullptr, FLAGS_benchmark_filter); } size_t RunSpecifiedBenchmarks(BenchmarkReporter* display_reporter, std::string spec) { return RunSpecifiedBenchmarks(display_reporter, nullptr, std::move(spec)); } size_t RunSpecifiedBenchmarks(BenchmarkReporter* display_reporter, BenchmarkReporter* file_reporter) { return RunSpecifiedBenchmarks(display_reporter, file_reporter, FLAGS_benchmark_filter); } size_t RunSpecifiedBenchmarks(BenchmarkReporter* display_reporter, BenchmarkReporter* file_reporter, std::string spec) { if (spec.empty() || spec == "all") { spec = "."; // Regexp that matches all benchmarks } // Setup the reporters std::ofstream output_file; std::unique_ptr default_display_reporter; std::unique_ptr default_file_reporter; if (display_reporter == nullptr) { default_display_reporter.reset(CreateDefaultDisplayReporter()); display_reporter = default_display_reporter.get(); } auto& Out = display_reporter->GetOutputStream(); auto& Err = display_reporter->GetErrorStream(); std::string const& fname = FLAGS_benchmark_out; if (fname.empty() && (file_reporter != nullptr)) { Err << "A custom file reporter was provided but " "--benchmark_out= was not specified.\n"; Out.flush(); Err.flush(); std::exit(1); } if (!fname.empty()) { output_file.open(fname); if (!output_file.is_open()) { Err << "invalid file name: '" << fname << "'\n"; Out.flush(); Err.flush(); std::exit(1); } if (file_reporter == nullptr) { default_file_reporter = internal::CreateReporter( FLAGS_benchmark_out_format, FLAGS_benchmark_counters_tabular ? ConsoleReporter::OO_Tabular : ConsoleReporter::OO_None); file_reporter = default_file_reporter.get(); } file_reporter->SetOutputStream(&output_file); file_reporter->SetErrorStream(&output_file); } std::vector benchmarks; if (!FindBenchmarksInternal(spec, &benchmarks, &Err)) { Out.flush(); Err.flush(); return 0; } if (benchmarks.empty()) { Err << "Failed to match any benchmarks against regex: " << spec << "\n"; Out.flush(); Err.flush(); return 0; } if (FLAGS_benchmark_list_tests) { for (auto const& benchmark : benchmarks) { Out << benchmark.name().str() << "\n"; } } else { internal::RunBenchmarks(benchmarks, display_reporter, file_reporter); } Out.flush(); Err.flush(); return benchmarks.size(); } namespace { // stores the time unit benchmarks use by default TimeUnit default_time_unit = kNanosecond; } // namespace TimeUnit GetDefaultTimeUnit() { return default_time_unit; } void SetDefaultTimeUnit(TimeUnit unit) { default_time_unit = unit; } std::string GetBenchmarkFilter() { return FLAGS_benchmark_filter; } void SetBenchmarkFilter(std::string value) { FLAGS_benchmark_filter = std::move(value); } int32_t GetBenchmarkVerbosity() { return FLAGS_v; } void RegisterMemoryManager(MemoryManager* manager) { internal::memory_manager = manager; } void RegisterProfilerManager(ProfilerManager* manager) { // Don't allow overwriting an existing manager. if (manager != nullptr) { BM_CHECK_EQ(internal::profiler_manager, nullptr); } internal::profiler_manager = manager; } void AddCustomContext(std::string key, std::string value) { if (internal::global_context == nullptr) { internal::global_context = new std::map(); } if (!internal::global_context->emplace(std::move(key), std::move(value)) .second) { std::cerr << "Failed to add custom context \"" << key << "\" as it already " << "exists with value \"" << value << "\"\n"; } } namespace internal { void (*HelperPrintf)(); namespace { void PrintUsageAndExit() { HelperPrintf(); std::flush(std::cout); std::flush(std::cerr); std::exit(0); } void SetDefaultTimeUnitFromFlag(const std::string& time_unit_flag) { if (time_unit_flag == "s") { return SetDefaultTimeUnit(kSecond); } if (time_unit_flag == "ms") { return SetDefaultTimeUnit(kMillisecond); } if (time_unit_flag == "us") { return SetDefaultTimeUnit(kMicrosecond); } if (time_unit_flag == "ns") { return SetDefaultTimeUnit(kNanosecond); } if (!time_unit_flag.empty()) { PrintUsageAndExit(); } } void ParseCommandLineFlags(int* argc, char** argv) { using namespace benchmark; BenchmarkReporter::Context::executable_name = ((argc != nullptr) && *argc > 0) ? argv[0] : "unknown"; for (int i = 1; (argc != nullptr) && i < *argc; ++i) { if (ParseBoolFlag(argv[i], "benchmark_list_tests", &FLAGS_benchmark_list_tests) || ParseStringFlag(argv[i], "benchmark_filter", &FLAGS_benchmark_filter) || ParseStringFlag(argv[i], "benchmark_min_time", &FLAGS_benchmark_min_time) || ParseDoubleFlag(argv[i], "benchmark_min_warmup_time", &FLAGS_benchmark_min_warmup_time) || ParseInt32Flag(argv[i], "benchmark_repetitions", &FLAGS_benchmark_repetitions) || ParseBoolFlag(argv[i], "benchmark_dry_run", &FLAGS_benchmark_dry_run) || ParseBoolFlag(argv[i], "benchmark_enable_random_interleaving", &FLAGS_benchmark_enable_random_interleaving) || ParseBoolFlag(argv[i], "benchmark_report_aggregates_only", &FLAGS_benchmark_report_aggregates_only) || ParseBoolFlag(argv[i], "benchmark_display_aggregates_only", &FLAGS_benchmark_display_aggregates_only) || ParseStringFlag(argv[i], "benchmark_format", &FLAGS_benchmark_format) || ParseStringFlag(argv[i], "benchmark_out", &FLAGS_benchmark_out) || ParseStringFlag(argv[i], "benchmark_out_format", &FLAGS_benchmark_out_format) || ParseStringFlag(argv[i], "benchmark_color", &FLAGS_benchmark_color) || ParseBoolFlag(argv[i], "benchmark_counters_tabular", &FLAGS_benchmark_counters_tabular) || ParseStringFlag(argv[i], "benchmark_perf_counters", &FLAGS_benchmark_perf_counters) || ParseKeyValueFlag(argv[i], "benchmark_context", &FLAGS_benchmark_context) || ParseStringFlag(argv[i], "benchmark_time_unit", &FLAGS_benchmark_time_unit) || ParseInt32Flag(argv[i], "v", &FLAGS_v)) { for (int j = i; j != *argc - 1; ++j) { argv[j] = argv[j + 1]; } --(*argc); --i; } else if (IsFlag(argv[i], "help")) { PrintUsageAndExit(); } } for (auto const* flag : {&FLAGS_benchmark_format, &FLAGS_benchmark_out_format}) { if (*flag != "console" && *flag != "json" && *flag != "csv") { PrintUsageAndExit(); } } SetDefaultTimeUnitFromFlag(FLAGS_benchmark_time_unit); if (FLAGS_benchmark_color.empty()) { PrintUsageAndExit(); } if (FLAGS_benchmark_dry_run) { AddCustomContext("dry_run", "true"); } for (const auto& kv : FLAGS_benchmark_context) { AddCustomContext(kv.first, kv.second); } } } // end namespace int InitializeStreams() { static std::ios_base::Init init; return 0; } template std::make_unsigned_t get_as_unsigned(T v) { using UnsignedT = std::make_unsigned_t; return static_cast(v); } } // end namespace internal void MaybeReenterWithoutASLR(int /*argc*/, char** argv) { // On e.g. Hexagon simulator, argv may be NULL. if (!argv) return; #ifdef BENCHMARK_OS_LINUX const auto curr_personality = personality(0xffffffff); // We should never fail to read-only query the current personality, // but let's be cautious. if (curr_personality == -1) return; // If ASLR is already disabled, we have nothing more to do. if (internal::get_as_unsigned(curr_personality) & ADDR_NO_RANDOMIZE) return; // Try to change the personality to disable ASLR. const auto proposed_personality = internal::get_as_unsigned(curr_personality) | ADDR_NO_RANDOMIZE; const auto prev_personality = personality(proposed_personality); // Have we failed to change the personality? That may happen. if (prev_personality == -1) return; // Make sure the parsona has been updated with the no-ASLR flag, // otherwise we will try to reenter infinitely. // This seems impossible, but can happen in some docker configurations. const auto new_personality = personality(0xffffffff); if ((internal::get_as_unsigned(new_personality) & ADDR_NO_RANDOMIZE) == 0) return; execv(argv[0], argv); // The exec() functions return only if an error has occurred, // in which case we want to just continue as-is. #else return; #endif } std::string GetBenchmarkVersion() { #ifdef BENCHMARK_VERSION return {BENCHMARK_VERSION}; #else return {""}; #endif } void PrintDefaultHelp() { fprintf(stdout, "benchmark" " [--benchmark_list_tests={true|false}]\n" " [--benchmark_filter=]\n" " [--benchmark_min_time=`x` OR `s` ]\n" " [--benchmark_min_warmup_time=]\n" " [--benchmark_repetitions=]\n" " [--benchmark_dry_run={true|false}]\n" " [--benchmark_enable_random_interleaving={true|false}]\n" " [--benchmark_report_aggregates_only={true|false}]\n" " [--benchmark_display_aggregates_only={true|false}]\n" " [--benchmark_format=]\n" " [--benchmark_out=]\n" " [--benchmark_out_format=]\n" " [--benchmark_color={auto|true|false}]\n" " [--benchmark_counters_tabular={true|false}]\n" #if defined HAVE_LIBPFM " [--benchmark_perf_counters=,...]\n" #endif " [--benchmark_context==,...]\n" " [--benchmark_time_unit={ns|us|ms|s}]\n" " [--v=]\n"); } void Initialize(int* argc, char** argv, void (*HelperPrintf)()) { internal::HelperPrintf = HelperPrintf; internal::ParseCommandLineFlags(argc, argv); internal::LogLevel() = FLAGS_v; } void Shutdown() { delete internal::global_context; } bool ReportUnrecognizedArguments(int argc, char** argv) { for (int i = 1; i < argc; ++i) { fprintf(stderr, "%s: error: unrecognized command-line flag: %s\n", argv[0], argv[i]); } return argc > 1; } } // end namespace benchmark ================================================ FILE: src/benchmark_api_internal.cc ================================================ #include "benchmark_api_internal.h" #include #include "string_util.h" namespace benchmark { namespace internal { BenchmarkInstance::BenchmarkInstance(benchmark::Benchmark* benchmark, int family_idx, int per_family_instance_idx, const std::vector& args, int thread_count) : benchmark_(*benchmark), family_index_(family_idx), per_family_instance_index_(per_family_instance_idx), aggregation_report_mode_(benchmark_.aggregation_report_mode_), args_(args), time_unit_(benchmark_.GetTimeUnit()), measure_process_cpu_time_(benchmark_.measure_process_cpu_time_), use_real_time_(benchmark_.use_real_time_), use_manual_time_(benchmark_.use_manual_time_), complexity_(benchmark_.complexity_), complexity_lambda_(benchmark_.complexity_lambda_), statistics_(benchmark_.statistics_), repetitions_(benchmark_.repetitions_), min_time_(benchmark_.min_time_), min_warmup_time_(benchmark_.min_warmup_time_), iterations_(benchmark_.iterations_), threads_(thread_count), setup_(benchmark_.setup_), teardown_(benchmark_.teardown_) { name_.function_name = benchmark_.name_; size_t arg_i = 0; for (const auto& arg : args) { if (!name_.args.empty()) { name_.args += '/'; } if (arg_i < benchmark->arg_names_.size()) { const auto& arg_name = benchmark_.arg_names_[arg_i]; if (!arg_name.empty()) { name_.args += StrFormat("%s:", arg_name.c_str()); } } name_.args += StrFormat("%" PRId64, arg); ++arg_i; } if (!IsZero(benchmark->min_time_)) { name_.min_time = StrFormat("min_time:%0.3f", benchmark_.min_time_); } if (!IsZero(benchmark->min_warmup_time_)) { name_.min_warmup_time = StrFormat("min_warmup_time:%0.3f", benchmark_.min_warmup_time_); } if (benchmark_.iterations_ != 0) { name_.iterations = StrFormat( "iterations:%lu", static_cast(benchmark_.iterations_)); } if (benchmark_.repetitions_ != 0) { name_.repetitions = StrFormat("repeats:%d", benchmark_.repetitions_); } if (benchmark_.measure_process_cpu_time_) { name_.time_type = "process_time"; } if (benchmark_.use_manual_time_) { if (!name_.time_type.empty()) { name_.time_type += '/'; } name_.time_type += "manual_time"; } else if (benchmark_.use_real_time_) { if (!name_.time_type.empty()) { name_.time_type += '/'; } name_.time_type += "real_time"; } if (!benchmark_.thread_counts_.empty()) { name_.threads = StrFormat("threads:%d", threads_); } } State BenchmarkInstance::Run( IterationCount iters, int thread_id, internal::ThreadTimer* timer, internal::ThreadManager* manager, internal::PerfCountersMeasurement* perf_counters_measurement, ProfilerManager* profiler_manager) const { State st(name_.function_name, iters, args_, thread_id, threads_, timer, manager, perf_counters_measurement, profiler_manager); benchmark_.Run(st); return st; } void BenchmarkInstance::Setup() const { if (setup_ != nullptr) { State st(name_.function_name, /*iters*/ 1, args_, /*thread_id*/ 0, threads_, nullptr, nullptr, nullptr, nullptr); setup_(st); } } void BenchmarkInstance::Teardown() const { if (teardown_ != nullptr) { State st(name_.function_name, /*iters*/ 1, args_, /*thread_id*/ 0, threads_, nullptr, nullptr, nullptr, nullptr); teardown_(st); } } } // namespace internal } // namespace benchmark ================================================ FILE: src/benchmark_api_internal.h ================================================ #ifndef BENCHMARK_API_INTERNAL_H #define BENCHMARK_API_INTERNAL_H #include #include #include #include #include #include #include "benchmark/benchmark_api.h" #include "benchmark/reporter.h" #include "benchmark/sysinfo.h" #include "commandlineflags.h" namespace benchmark { namespace internal { // Information kept per benchmark we may want to run class BenchmarkInstance { public: BenchmarkInstance(benchmark::Benchmark* benchmark, int family_idx, int per_family_instance_idx, const std::vector& args, int thread_count); const BenchmarkName& name() const { return name_; } int family_index() const { return family_index_; } int per_family_instance_index() const { return per_family_instance_index_; } AggregationReportMode aggregation_report_mode() const { return aggregation_report_mode_; } TimeUnit time_unit() const { return time_unit_; } bool measure_process_cpu_time() const { return measure_process_cpu_time_; } bool use_real_time() const { return use_real_time_; } bool use_manual_time() const { return use_manual_time_; } BigO complexity() const { return complexity_; } BigOFunc* complexity_lambda() const { return complexity_lambda_; } const std::vector& statistics() const { return statistics_; } int repetitions() const { return repetitions_; } double min_time() const { return min_time_; } double min_warmup_time() const { return min_warmup_time_; } IterationCount iterations() const { return iterations_; } int threads() const { return threads_; } void Setup() const; void Teardown() const; const auto& GetUserThreadRunnerFactory() const { return benchmark_.threadrunner_; } State Run(IterationCount iters, int thread_id, internal::ThreadTimer* timer, internal::ThreadManager* manager, internal::PerfCountersMeasurement* perf_counters_measurement, ProfilerManager* profiler_manager) const; private: BenchmarkName name_; benchmark::Benchmark& benchmark_; const int family_index_; const int per_family_instance_index_; AggregationReportMode aggregation_report_mode_; const std::vector& args_; TimeUnit time_unit_; bool measure_process_cpu_time_; bool use_real_time_; bool use_manual_time_; BigO complexity_; BigOFunc* complexity_lambda_; UserCounters counters_; const std::vector& statistics_; int repetitions_; double min_time_; double min_warmup_time_; IterationCount iterations_; int threads_; // Number of concurrent threads to us callback_function setup_; callback_function teardown_; }; bool FindBenchmarksInternal(const std::string& re, std::vector* benchmarks, std::ostream* Err); bool IsZero(double n); BENCHMARK_EXPORT ConsoleReporter::OutputOptions GetOutputOptions(bool force_no_color = false); } // end namespace internal } // end namespace benchmark #endif // BENCHMARK_API_INTERNAL_H ================================================ FILE: src/benchmark_main.cc ================================================ // Copyright 2018 Google Inc. All rights reserved. // // Licensed under the Apache License, Version 2.0 (the "License"); // you may not use this file except in compliance with the License. // You may obtain a copy of the License at // // http://www.apache.org/licenses/LICENSE-2.0 // // Unless required by applicable law or agreed to in writing, software // distributed under the License is distributed on an "AS IS" BASIS, // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. // See the License for the specific language governing permissions and // limitations under the License. #include "benchmark/export.h" #include "benchmark/registration.h" BENCHMARK_EXPORT int main(int /*argc*/, char** /*argv*/); BENCHMARK_MAIN(); ================================================ FILE: src/benchmark_name.cc ================================================ // Copyright 2015 Google Inc. All rights reserved. // // Licensed under the Apache License, Version 2.0 (the "License"); // you may not use this file except in compliance with the License. // You may obtain a copy of the License at // // http://www.apache.org/licenses/LICENSE-2.0 // // Unless required by applicable law or agreed to in writing, software // distributed under the License is distributed on an "AS IS" BASIS, // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. // See the License for the specific language governing permissions and // limitations under the License. #include "benchmark/export.h" #include "benchmark/reporter.h" namespace benchmark { namespace { // Compute the total size of a pack of std::strings size_t size_impl() { return 0; } template size_t size_impl(const Head& head, const Tail&... tail) { return head.size() + size_impl(tail...); } // Join a pack of std::strings using a delimiter // TODO(dominic): use absl::StrJoin void join_impl(std::string& /*unused*/, char /*unused*/) {} template void join_impl(std::string& s, const char delimiter, const Head& head, const Tail&... tail) { if (!s.empty() && !head.empty()) { s += delimiter; } s += head; join_impl(s, delimiter, tail...); } template std::string join(char delimiter, const Ts&... ts) { std::string s; s.reserve(sizeof...(Ts) + size_impl(ts...)); join_impl(s, delimiter, ts...); return s; } } // namespace BENCHMARK_EXPORT std::string BenchmarkName::str() const { return join('/', function_name, args, min_time, min_warmup_time, iterations, repetitions, time_type, threads); } } // namespace benchmark ================================================ FILE: src/benchmark_register.cc ================================================ // Copyright 2015 Google Inc. All rights reserved. // // Licensed under the Apache License, Version 2.0 (the "License"); // you may not use this file except in compliance with the License. // You may obtain a copy of the License at // // http://www.apache.org/licenses/LICENSE-2.0 // // Unless required by applicable law or agreed to in writing, software // distributed under the License is distributed on an "AS IS" BASIS, // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. // See the License for the specific language governing permissions and // limitations under the License. #include "benchmark_register.h" #ifndef BENCHMARK_OS_WINDOWS #if !defined(BENCHMARK_OS_FUCHSIA) && !defined(BENCHMARK_OS_QURT) && \ !defined(BENCHMARK_OS_WASI) #include #endif #include #include #endif #include #include #include #include #include #include #include #include #include #include #include #include #include #include "benchmark/benchmark_api.h" #include "benchmark/registration.h" #include "benchmark/reporter.h" #include "benchmark/statistics.h" #include "benchmark/types.h" #include "benchmark_api_internal.h" #include "check.h" #include "commandlineflags.h" #include "complexity.h" #include "internal_macros.h" #include "log.h" #include "mutex.h" #include "re.h" #include "statistics.h" #include "string_util.h" #include "timers.h" namespace benchmark { namespace { // For non-dense Range, intermediate values are powers of kRangeMultiplier. constexpr int kRangeMultiplier = 8; // The size of a benchmark family determines is the number of inputs to repeat // the benchmark on. If this is "large" then warn the user during configuration. constexpr size_t kMaxFamilySize = 100; constexpr char kDisabledPrefix[] = "DISABLED_"; } // end namespace namespace internal { //=============================================================================// // BenchmarkFamilies //=============================================================================// // Class for managing registered benchmarks. Note that each registered // benchmark identifies a family of related benchmarks to run. class BenchmarkFamilies { public: static BenchmarkFamilies* GetInstance(); // Registers a benchmark family and returns the index assigned to it. size_t AddBenchmark(std::unique_ptr family); // Clear all registered benchmark families. void ClearBenchmarks(); // Extract the list of benchmark instances that match the specified // regular expression. bool FindBenchmarks(std::string spec, std::vector* benchmarks, std::ostream* Err); private: BenchmarkFamilies() {} std::vector> families_; Mutex mutex_; }; BenchmarkFamilies* BenchmarkFamilies::GetInstance() { static BenchmarkFamilies instance; return &instance; } size_t BenchmarkFamilies::AddBenchmark( std::unique_ptr family) { MutexLock l(mutex_); size_t index = families_.size(); families_.push_back(std::move(family)); return index; } void BenchmarkFamilies::ClearBenchmarks() { MutexLock l(mutex_); families_.clear(); families_.shrink_to_fit(); } bool BenchmarkFamilies::FindBenchmarks( std::string spec, std::vector* benchmarks, std::ostream* ErrStream) { BM_CHECK(ErrStream); auto& Err = *ErrStream; // Make regular expression out of command-line flag std::string error_msg; Regex re; bool is_negative_filter = false; if (spec[0] == '-') { spec.replace(0, 1, ""); is_negative_filter = true; } if (!re.Init(spec, &error_msg)) { Err << "Could not compile benchmark re: " << error_msg << '\n'; return false; } // Special list of thread counts to use when none are specified const std::vector one_thread = {1}; int next_family_index = 0; MutexLock l(mutex_); for (std::unique_ptr& family : families_) { int family_index = next_family_index; int per_family_instance_index = 0; // Family was deleted or benchmark doesn't match if (!family) { continue; } if (family->ArgsCnt() == -1) { family->Args({}); } const std::vector* thread_counts = (family->thread_counts_.empty() ? &one_thread : &static_cast&>(family->thread_counts_)); const size_t family_size = family->args_.size() * thread_counts->size(); // The benchmark will be run at least 'family_size' different inputs. // If 'family_size' is very large warn the user. if (family_size > kMaxFamilySize) { Err << "The number of inputs is very large. " << family->name_ << " will be repeated at least " << family_size << " times.\n"; } // reserve in the special case the regex ".", since we know the final // family size. this doesn't take into account any disabled benchmarks // so worst case we reserve more than we need. if (spec == ".") { benchmarks->reserve(benchmarks->size() + family_size); } for (auto const& args : family->args_) { for (int num_threads : *thread_counts) { BenchmarkInstance instance(family.get(), family_index, per_family_instance_index, args, num_threads); const auto full_name = instance.name().str(); if (full_name.rfind(kDisabledPrefix, 0) != 0 && ((re.Match(full_name) && !is_negative_filter) || (!re.Match(full_name) && is_negative_filter))) { benchmarks->push_back(std::move(instance)); ++per_family_instance_index; // Only bump the next family index once we've established that // at least one instance of this family will be run. if (next_family_index == family_index) { ++next_family_index; } } } } } return true; } benchmark::Benchmark* RegisterBenchmarkInternal( std::unique_ptr bench) { benchmark::Benchmark* bench_ptr = bench.get(); BenchmarkFamilies* families = BenchmarkFamilies::GetInstance(); families->AddBenchmark(std::move(bench)); return bench_ptr; } // FIXME: This function is a hack so that benchmark.cc can access // `BenchmarkFamilies` bool FindBenchmarksInternal(const std::string& re, std::vector* benchmarks, std::ostream* Err) { return BenchmarkFamilies::GetInstance()->FindBenchmarks(re, benchmarks, Err); } } // end namespace internal //=============================================================================// // Benchmark //=============================================================================// Benchmark::Benchmark(const std::string& name) : name_(name), aggregation_report_mode_(internal::ARM_Unspecified), time_unit_(GetDefaultTimeUnit()), use_default_time_unit_(true), range_multiplier_(kRangeMultiplier), min_time_(0), min_warmup_time_(0), iterations_(0), repetitions_(0), measure_process_cpu_time_(false), use_real_time_(false), use_manual_time_(false), complexity_(oNone), complexity_lambda_(nullptr) { ComputeStatistics("mean", StatisticsMean); ComputeStatistics("median", StatisticsMedian); ComputeStatistics("stddev", StatisticsStdDev); ComputeStatistics("cv", StatisticsCV, kPercentage); } Benchmark::~Benchmark() {} Benchmark* Benchmark::Name(const std::string& name) { SetName(name); return this; } Benchmark* Benchmark::Arg(int64_t x) { BM_CHECK(ArgsCnt() == -1 || ArgsCnt() == 1); args_.push_back({x}); return this; } Benchmark* Benchmark::Unit(TimeUnit unit) { time_unit_ = unit; use_default_time_unit_ = false; return this; } Benchmark* Benchmark::Range(int64_t start, int64_t limit) { BM_CHECK(ArgsCnt() == -1 || ArgsCnt() == 1); std::vector arglist; internal::AddRange(&arglist, start, limit, range_multiplier_); for (int64_t i : arglist) { args_.push_back({i}); } return this; } Benchmark* Benchmark::Ranges( const std::vector>& ranges) { BM_CHECK(ArgsCnt() == -1 || ArgsCnt() == static_cast(ranges.size())); std::vector> arglists(ranges.size()); for (std::size_t i = 0; i < ranges.size(); i++) { internal::AddRange(&arglists[i], ranges[i].first, ranges[i].second, range_multiplier_); } ArgsProduct(arglists); return this; } Benchmark* Benchmark::ArgsProduct( const std::vector>& arglists) { BM_CHECK(ArgsCnt() == -1 || ArgsCnt() == static_cast(arglists.size())); std::vector indices(arglists.size()); const std::size_t total = std::accumulate( std::begin(arglists), std::end(arglists), std::size_t{1}, [](const std::size_t res, const std::vector& arglist) { return res * arglist.size(); }); std::vector args; args.reserve(arglists.size()); for (std::size_t i = 0; i < total; i++) { for (std::size_t arg = 0; arg < arglists.size(); arg++) { args.push_back(arglists[arg][indices[arg]]); } args_.push_back(args); args.clear(); std::size_t arg = 0; do { indices[arg] = (indices[arg] + 1) % arglists[arg].size(); } while (indices[arg++] == 0 && arg < arglists.size()); } return this; } Benchmark* Benchmark::ArgName(const std::string& name) { BM_CHECK(ArgsCnt() == -1 || ArgsCnt() == 1); arg_names_ = {name}; return this; } Benchmark* Benchmark::ArgNames(const std::vector& names) { BM_CHECK(ArgsCnt() == -1 || ArgsCnt() == static_cast(names.size())); arg_names_ = names; return this; } Benchmark* Benchmark::DenseRange(int64_t start, int64_t limit, int step) { BM_CHECK(ArgsCnt() == -1 || ArgsCnt() == 1); BM_CHECK_LE(start, limit); for (int64_t arg = start; arg <= limit; arg += step) { args_.push_back({arg}); } return this; } Benchmark* Benchmark::Args(const std::vector& args) { BM_CHECK(ArgsCnt() == -1 || ArgsCnt() == static_cast(args.size())); args_.push_back(args); return this; } Benchmark* Benchmark::Apply( const std::function& custom_arguments) { custom_arguments(this); return this; } Benchmark* Benchmark::Setup(callback_function&& setup) { BM_CHECK(setup != nullptr); setup_ = std::forward(setup); return this; } Benchmark* Benchmark::Setup(const callback_function& setup) { BM_CHECK(setup != nullptr); setup_ = setup; return this; } Benchmark* Benchmark::Teardown(callback_function&& teardown) { BM_CHECK(teardown != nullptr); teardown_ = std::forward(teardown); return this; } Benchmark* Benchmark::Teardown(const callback_function& teardown) { BM_CHECK(teardown != nullptr); teardown_ = teardown; return this; } Benchmark* Benchmark::RangeMultiplier(int multiplier) { BM_CHECK(multiplier > 1); range_multiplier_ = multiplier; return this; } Benchmark* Benchmark::MinTime(double t) { BM_CHECK(t > 0.0); BM_CHECK(iterations_ == 0); min_time_ = t; return this; } Benchmark* Benchmark::MinWarmUpTime(double t) { BM_CHECK(t >= 0.0); BM_CHECK(iterations_ == 0); min_warmup_time_ = t; return this; } Benchmark* Benchmark::Iterations(IterationCount n) { BM_CHECK(n > 0); BM_CHECK(internal::IsZero(min_time_)); BM_CHECK(internal::IsZero(min_warmup_time_)); iterations_ = n; return this; } Benchmark* Benchmark::Repetitions(int n) { BM_CHECK(n > 0); repetitions_ = n; return this; } Benchmark* Benchmark::ReportAggregatesOnly(bool value) { aggregation_report_mode_ = value ? internal::ARM_ReportAggregatesOnly : internal::ARM_Default; return this; } Benchmark* Benchmark::DisplayAggregatesOnly(bool value) { // If we were called, the report mode is no longer 'unspecified', in any case. using internal::AggregationReportMode; aggregation_report_mode_ = static_cast( aggregation_report_mode_ | internal::ARM_Default); if (value) { aggregation_report_mode_ = static_cast( aggregation_report_mode_ | internal::ARM_DisplayReportAggregatesOnly); } else { aggregation_report_mode_ = static_cast( aggregation_report_mode_ & ~internal::ARM_DisplayReportAggregatesOnly); } return this; } Benchmark* Benchmark::MeasureProcessCPUTime() { // Can be used together with UseRealTime() / UseManualTime(). measure_process_cpu_time_ = true; return this; } Benchmark* Benchmark::UseRealTime() { BM_CHECK(!use_manual_time_) << "Cannot set UseRealTime and UseManualTime simultaneously."; use_real_time_ = true; return this; } Benchmark* Benchmark::UseManualTime() { BM_CHECK(!use_real_time_) << "Cannot set UseRealTime and UseManualTime simultaneously."; use_manual_time_ = true; return this; } Benchmark* Benchmark::Complexity(BigO complexity) { complexity_ = complexity; return this; } Benchmark* Benchmark::Complexity(BigOFunc* complexity) { complexity_lambda_ = complexity; complexity_ = oLambda; return this; } Benchmark* Benchmark::ComputeStatistics(const std::string& name, StatisticsFunc* statistics, StatisticUnit unit) { statistics_.emplace_back(name, statistics, unit); return this; } Benchmark* Benchmark::Threads(int t) { BM_CHECK_GT(t, 0); thread_counts_.push_back(t); return this; } Benchmark* Benchmark::ThreadRange(int min_threads, int max_threads) { BM_CHECK_GT(min_threads, 0); BM_CHECK_GE(max_threads, min_threads); internal::AddRange(&thread_counts_, min_threads, max_threads, 2); return this; } Benchmark* Benchmark::DenseThreadRange(int min_threads, int max_threads, int stride) { BM_CHECK_GT(min_threads, 0); BM_CHECK_GE(max_threads, min_threads); BM_CHECK_GE(stride, 1); for (auto i = min_threads; i < max_threads; i += stride) { thread_counts_.push_back(i); } thread_counts_.push_back(max_threads); return this; } Benchmark* Benchmark::ThreadPerCpu() { thread_counts_.push_back(CPUInfo::Get().num_cpus); return this; } Benchmark* Benchmark::ThreadRunner(threadrunner_factory&& factory) { threadrunner_ = std::move(factory); return this; } void Benchmark::SetName(const std::string& name) { name_ = name; } const char* Benchmark::GetName() const { return name_.c_str(); } int Benchmark::ArgsCnt() const { if (args_.empty()) { if (arg_names_.empty()) { return -1; } return static_cast(arg_names_.size()); } return static_cast(args_.front().size()); } const char* Benchmark::GetArgName(int arg) const { BM_CHECK_GE(arg, 0); size_t uarg = static_cast(arg); BM_CHECK_LT(uarg, arg_names_.size()); return arg_names_[uarg].c_str(); } TimeUnit Benchmark::GetTimeUnit() const { return use_default_time_unit_ ? GetDefaultTimeUnit() : time_unit_; } namespace internal { //=============================================================================// // FunctionBenchmark //=============================================================================// void FunctionBenchmark::Run(State& st) { func_(st); } } // end namespace internal void ClearRegisteredBenchmarks() { internal::BenchmarkFamilies::GetInstance()->ClearBenchmarks(); } std::vector CreateRange(int64_t lo, int64_t hi, int multi) { std::vector args; internal::AddRange(&args, lo, hi, multi); return args; } std::vector CreateDenseRange(int64_t start, int64_t limit, int step) { BM_CHECK_LE(start, limit); std::vector args; for (int64_t arg = start; arg <= limit; arg += step) { args.push_back(arg); } return args; } } // end namespace benchmark ================================================ FILE: src/benchmark_register.h ================================================ #ifndef BENCHMARK_REGISTER_H #define BENCHMARK_REGISTER_H #include #include #include #include #include "check.h" namespace benchmark { namespace internal { // Append the powers of 'mult' in the closed interval [lo, hi]. // Returns iterator to the start of the inserted range. template typename std::vector::iterator AddPowers(std::vector* dst, T lo, T hi, int mult) { BM_CHECK_GE(lo, 0); BM_CHECK_GE(hi, lo); BM_CHECK_GE(mult, 2); const size_t start_offset = dst->size(); static const T kmax = std::numeric_limits::max(); // Space out the values in multiples of "mult" for (T i = static_cast(1); i <= hi; i = static_cast(i * mult)) { if (i >= lo) { dst->push_back(i); } // Break the loop here since multiplying by // 'mult' would move outside of the range of T if (i > kmax / mult) break; } return dst->begin() + static_cast(start_offset); } template void AddNegatedPowers(std::vector* dst, T lo, T hi, int mult) { // We negate lo and hi so we require that they cannot be equal to 'min'. BM_CHECK_GT(lo, std::numeric_limits::min()); BM_CHECK_GT(hi, std::numeric_limits::min()); BM_CHECK_GE(hi, lo); BM_CHECK_LE(hi, 0); // Add positive powers, then negate and reverse. // Casts necessary since small integers get promoted // to 'int' when negating. const auto lo_complement = static_cast(-lo); const auto hi_complement = static_cast(-hi); const auto it = AddPowers(dst, hi_complement, lo_complement, mult); std::for_each(it, dst->end(), [](T& t) { t = static_cast(t * -1); }); std::reverse(it, dst->end()); } template void AddRange(std::vector* dst, T lo, T hi, int mult) { static_assert(std::is_integral::value && std::is_signed::value, "Args type must be a signed integer"); BM_CHECK_GE(hi, lo); BM_CHECK_GE(mult, 2); // Add "lo" dst->push_back(lo); // Handle lo == hi as a special case, so we then know // lo < hi and so it is safe to add 1 to lo and subtract 1 // from hi without falling outside of the range of T. if (lo == hi) return; // Ensure that lo_inner <= hi_inner below. if (lo + 1 == hi) { dst->push_back(hi); return; } // Add all powers of 'mult' in the range [lo+1, hi-1] (inclusive). const auto lo_inner = static_cast(lo + 1); const auto hi_inner = static_cast(hi - 1); // Insert negative values if (lo_inner < 0) { AddNegatedPowers(dst, lo_inner, std::min(hi_inner, T{-1}), mult); } // Treat 0 as a special case (see discussion on #762). if (lo < 0 && hi >= 0) { dst->push_back(0); } // Insert positive values if (hi_inner > 0) { AddPowers(dst, std::max(lo_inner, T{1}), hi_inner, mult); } // Add "hi" (if different from last value). if (hi != dst->back()) { dst->push_back(hi); } } } // namespace internal } // namespace benchmark #endif // BENCHMARK_REGISTER_H ================================================ FILE: src/benchmark_runner.cc ================================================ // Copyright 2015 Google Inc. All rights reserved. // // Licensed under the Apache License, Version 2.0 (the "License"); // you may not use this file except in compliance with the License. // You may obtain a copy of the License at // // http://www.apache.org/licenses/LICENSE-2.0 // // Unless required by applicable law or agreed to in writing, software // distributed under the License is distributed on an "AS IS" BASIS, // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. // See the License for the specific language governing permissions and // limitations under the License. #include "benchmark_runner.h" #include "benchmark/benchmark_api.h" #include "benchmark/managers.h" #include "benchmark/reporter.h" #include "benchmark/state.h" #include "benchmark/types.h" #include "benchmark_api_internal.h" #include "internal_macros.h" #ifndef BENCHMARK_OS_WINDOWS #if !defined(BENCHMARK_OS_FUCHSIA) && !defined(BENCHMARK_OS_QURT) && \ !defined(BENCHMARK_OS_WASI) #include #endif #include #include #endif #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include "check.h" #include "colorprint.h" #include "commandlineflags.h" #include "complexity.h" #include "counter.h" #include "log.h" #include "mutex.h" #include "perf_counters.h" #include "re.h" #include "statistics.h" #include "string_util.h" #include "thread_manager.h" #include "thread_timer.h" namespace benchmark { BM_DECLARE_bool(benchmark_dry_run); BM_DECLARE_string(benchmark_min_time); BM_DECLARE_double(benchmark_min_warmup_time); BM_DECLARE_int32(benchmark_repetitions); BM_DECLARE_bool(benchmark_report_aggregates_only); BM_DECLARE_bool(benchmark_display_aggregates_only); BM_DECLARE_string(benchmark_perf_counters); namespace internal { MemoryManager* memory_manager = nullptr; ProfilerManager* profiler_manager = nullptr; namespace { constexpr IterationCount kMaxIterations = 1000000000000; const double kDefaultMinTime = std::strtod(::benchmark::kDefaultMinTimeStr, /*p_end*/ nullptr); BenchmarkReporter::Run CreateRunReport( const benchmark::internal::BenchmarkInstance& b, const internal::ThreadManager::Result& results, IterationCount memory_iterations, const MemoryManager::Result& memory_result, double seconds, int64_t repetition_index, int64_t repeats) { // Create report about this benchmark run. BenchmarkReporter::Run report; report.run_name = b.name(); report.family_index = b.family_index(); report.per_family_instance_index = b.per_family_instance_index(); report.skipped = results.skipped_; report.skip_message = results.skip_message_; report.report_label = results.report_label_; // This is the total iterations across all threads. report.iterations = results.iterations; report.time_unit = b.time_unit(); report.threads = b.threads(); report.repetition_index = repetition_index; report.repetitions = repeats; if (report.skipped == 0u) { if (b.use_manual_time()) { report.real_accumulated_time = results.manual_time_used; } else { report.real_accumulated_time = results.real_time_used; } report.use_real_time_for_initial_big_o = b.use_manual_time(); report.cpu_accumulated_time = results.cpu_time_used; report.complexity_n = results.complexity_n; report.complexity = b.complexity(); report.complexity_lambda = b.complexity_lambda(); report.statistics = &b.statistics(); report.counters = results.counters; if (memory_iterations > 0) { report.memory_result = memory_result; report.allocs_per_iter = memory_iterations != 0 ? static_cast(memory_result.num_allocs) / static_cast(memory_iterations) : 0; } // The CPU time is the total time taken by all thread. If we used that as // the denominator, we'd be calculating the rate per thread here. This is // why we have to divide the total cpu_time by the number of threads for // global counters to get a global rate. const double thread_seconds = seconds / b.threads(); internal::Finish(&report.counters, results.iterations, thread_seconds, b.threads()); } return report; } // Execute one thread of benchmark b for the specified number of iterations. // Adds the stats collected for the thread into manager->results. void RunInThread(const BenchmarkInstance* b, IterationCount iters, int thread_id, ThreadManager* manager, PerfCountersMeasurement* perf_counters_measurement, ProfilerManager* profiler_manager_) { internal::ThreadTimer timer( b->measure_process_cpu_time() ? internal::ThreadTimer::CreateProcessCpuTime() : internal::ThreadTimer::Create()); State st = b->Run(iters, thread_id, &timer, manager, perf_counters_measurement, profiler_manager_); if (!(st.skipped() || st.iterations() >= st.max_iterations)) { st.SkipWithError( "The benchmark didn't run, nor was it explicitly skipped. Please call " "'SkipWithXXX` in your benchmark as appropriate."); } { MutexLock l(manager->GetBenchmarkMutex()); internal::ThreadManager::Result& results = manager->results; results.iterations += st.iterations(); results.cpu_time_used += timer.cpu_time_used(); results.real_time_used += timer.real_time_used(); results.manual_time_used += timer.manual_time_used(); results.complexity_n += st.complexity_length_n(); internal::Increment(&results.counters, st.counters); } manager->NotifyThreadComplete(); } double ComputeMinTime(const benchmark::internal::BenchmarkInstance& b, const BenchTimeType& iters_or_time) { if (!IsZero(b.min_time())) { return b.min_time(); } // If the flag was used to specify number of iters, then return the default // min_time. if (iters_or_time.tag == BenchTimeType::ITERS) { return kDefaultMinTime; } return iters_or_time.time; } IterationCount ComputeIters(const benchmark::internal::BenchmarkInstance& b, const BenchTimeType& iters_or_time) { if (b.iterations() != 0) { return b.iterations(); } // We've already concluded that this flag is currently used to pass // iters but do a check here again anyway. BM_CHECK(iters_or_time.tag == BenchTimeType::ITERS); return iters_or_time.iters; } class ThreadRunnerDefault : public ThreadRunnerBase { public: explicit ThreadRunnerDefault(int num_threads) : pool(static_cast(num_threads - 1)) {} void RunThreads(const std::function& fn) override final { // Run all but one thread in separate threads for (std::size_t ti = 0; ti < pool.size(); ++ti) { pool[ti] = std::thread(fn, static_cast(ti + 1)); } // And run one thread here directly. // (If we were asked to run just one thread, we don't create new threads.) // Yes, we need to do this here *after* we start the separate threads. fn(0); // The main thread has finished. Now let's wait for the other threads. for (std::thread& thread : pool) { thread.join(); } } private: std::vector pool; }; std::unique_ptr GetThreadRunner( const benchmark::threadrunner_factory& userThreadRunnerFactory, int num_threads) { return userThreadRunnerFactory ? userThreadRunnerFactory(num_threads) : std::make_unique(num_threads); } } // end namespace BenchTimeType ParseBenchMinTime(const std::string& value) { BenchTimeType ret = {}; if (value.empty()) { ret.tag = BenchTimeType::TIME; ret.time = 0.0; return ret; } if (value.back() == 'x') { char* p_end = nullptr; // Reset errno before it's changed by strtol. errno = 0; IterationCount num_iters = std::strtol(value.c_str(), &p_end, 10); // After a valid parse, p_end should have been set to // point to the 'x' suffix. BM_CHECK(errno == 0 && p_end != nullptr && *p_end == 'x') << "Malformed iters value passed to --benchmark_min_time: `" << value << "`. Expected --benchmark_min_time=x."; ret.tag = BenchTimeType::ITERS; ret.iters = num_iters; return ret; } bool has_suffix = value.back() == 's'; if (!has_suffix) { BM_VLOG(0) << "Value passed to --benchmark_min_time should have a suffix. " "Eg., `30s` for 30-seconds."; } char* p_end = nullptr; // Reset errno before it's changed by strtod. errno = 0; double min_time = std::strtod(value.c_str(), &p_end); // After a successful parse, p_end should point to the suffix 's', // or the end of the string if the suffix was omitted. BM_CHECK(errno == 0 && p_end != nullptr && ((has_suffix && *p_end == 's') || *p_end == '\0')) << "Malformed seconds value passed to --benchmark_min_time: `" << value << "`. Expected --benchmark_min_time=x."; ret.tag = BenchTimeType::TIME; ret.time = min_time; return ret; } BenchmarkRunner::BenchmarkRunner( const benchmark::internal::BenchmarkInstance& b_, PerfCountersMeasurement* pcm_, BenchmarkReporter::PerFamilyRunReports* reports_for_family_) : b(b_), reports_for_family(reports_for_family_), parsed_benchtime_flag(ParseBenchMinTime(FLAGS_benchmark_min_time)), min_time(FLAGS_benchmark_dry_run ? 0 : ComputeMinTime(b_, parsed_benchtime_flag)), min_warmup_time( FLAGS_benchmark_dry_run ? 0 : ((!IsZero(b.min_time()) && b.min_warmup_time() > 0.0) ? b.min_warmup_time() : FLAGS_benchmark_min_warmup_time)), warmup_done(FLAGS_benchmark_dry_run ? true : !(min_warmup_time > 0.0)), repeats(FLAGS_benchmark_dry_run ? 1 : (b.repetitions() != 0 ? b.repetitions() : FLAGS_benchmark_repetitions)), has_explicit_iteration_count(b.iterations() != 0 || parsed_benchtime_flag.tag == BenchTimeType::ITERS), thread_runner( GetThreadRunner(b.GetUserThreadRunnerFactory(), b.threads())), iters(FLAGS_benchmark_dry_run ? 1 : (has_explicit_iteration_count ? ComputeIters(b_, parsed_benchtime_flag) : 1)), perf_counters_measurement_ptr(pcm_) { run_results.display_report_aggregates_only = (FLAGS_benchmark_report_aggregates_only || FLAGS_benchmark_display_aggregates_only); run_results.file_report_aggregates_only = FLAGS_benchmark_report_aggregates_only; if (b.aggregation_report_mode() != internal::ARM_Unspecified) { run_results.display_report_aggregates_only = ((b.aggregation_report_mode() & internal::ARM_DisplayReportAggregatesOnly) != 0u); run_results.file_report_aggregates_only = ((b.aggregation_report_mode() & internal::ARM_FileReportAggregatesOnly) != 0u); BM_CHECK(FLAGS_benchmark_perf_counters.empty() || (perf_counters_measurement_ptr->num_counters() == 0)) << "Perf counters were requested but could not be set up."; } } BenchmarkRunner::IterationResults BenchmarkRunner::DoNIterations() { BM_VLOG(2) << "Running " << b.name().str() << " for " << iters << "\n"; std::unique_ptr manager; manager.reset(new internal::ThreadManager(b.threads())); thread_runner->RunThreads([&](int thread_idx) { RunInThread(&b, iters, thread_idx, manager.get(), perf_counters_measurement_ptr, /*profiler_manager=*/nullptr); }); IterationResults i; // Acquire the measurements/counters from the manager, UNDER THE LOCK! { MutexLock l(manager->GetBenchmarkMutex()); i.results = manager->results; } // And get rid of the manager. manager.reset(); BM_VLOG(2) << "Ran in " << i.results.cpu_time_used << "/" << i.results.real_time_used << "\n"; // By using KeepRunningBatch a benchmark can iterate more times than // requested, so take the iteration count from i.results. i.iters = i.results.iterations / b.threads(); // Base decisions off of real time if requested by this benchmark. i.seconds = i.results.cpu_time_used; if (b.use_manual_time()) { i.seconds = i.results.manual_time_used; } else if (b.use_real_time()) { i.seconds = i.results.real_time_used; } return i; } IterationCount BenchmarkRunner::PredictNumItersNeeded( const IterationResults& i) const { // See how much iterations should be increased by. // Note: Avoid division by zero with max(seconds, 1ns). double multiplier = GetMinTimeToApply() * 1.4 / std::max(i.seconds, 1e-9); // If our last run was at least 10% of FLAGS_benchmark_min_time then we // use the multiplier directly. // Otherwise we use at most 10 times expansion. // NOTE: When the last run was at least 10% of the min time the max // expansion should be 14x. const bool is_significant = (i.seconds / GetMinTimeToApply()) > 0.1; multiplier = is_significant ? multiplier : 10.0; // So what seems to be the sufficiently-large iteration count? Round up. const IterationCount max_next_iters = static_cast( std::llround(std::max(multiplier * static_cast(i.iters), static_cast(i.iters) + 1.0))); // But we do have *some* limits though.. const IterationCount next_iters = std::min(max_next_iters, kMaxIterations); BM_VLOG(3) << "Next iters: " << next_iters << ", " << multiplier << "\n"; return next_iters; // round up before conversion to integer. } bool BenchmarkRunner::ShouldReportIterationResults( const IterationResults& i) const { // Determine if this run should be reported; // Either it has run for a sufficient amount of time // or because an error was reported. return (i.results.skipped_ != 0u) || FLAGS_benchmark_dry_run || i.iters >= kMaxIterations || // Too many iterations already. i.seconds >= GetMinTimeToApply() || // The elapsed time is large enough. // CPU time is specified but the elapsed real time greatly exceeds // the minimum time. // Note that user provided timers are except from this test. ((i.results.real_time_used >= 5 * GetMinTimeToApply()) && !b.use_manual_time()); } double BenchmarkRunner::GetMinTimeToApply() const { // In order to reuse functionality to run and measure benchmarks for running // a warmup phase of the benchmark, we need a way of telling whether to apply // min_time or min_warmup_time. This function will figure out if we are in the // warmup phase and therefore need to apply min_warmup_time or if we already // in the benchmarking phase and min_time needs to be applied. return warmup_done ? min_time : min_warmup_time; } void BenchmarkRunner::FinishWarmUp(const IterationCount& i) { warmup_done = true; iters = i; } void BenchmarkRunner::RunWarmUp() { // Use the same mechanisms for warming up the benchmark as used for actually // running and measuring the benchmark. IterationResults i_warmup; // Dont use the iterations determined in the warmup phase for the actual // measured benchmark phase. While this may be a good starting point for the // benchmark and it would therefore get rid of the need to figure out how many // iterations are needed if min_time is set again, this may also be a complete // wrong guess since the warmup loops might be considerably slower (e.g // because of caching effects). const IterationCount i_backup = iters; for (;;) { b.Setup(); i_warmup = DoNIterations(); b.Teardown(); const bool finish = ShouldReportIterationResults(i_warmup); if (finish) { FinishWarmUp(i_backup); break; } // Although we are running "only" a warmup phase where running enough // iterations at once without measuring time isn't as important as it is for // the benchmarking phase, we still do it the same way as otherwise it is // very confusing for the user to know how to choose a proper value for // min_warmup_time if a different approach on running it is used. iters = PredictNumItersNeeded(i_warmup); assert(iters > i_warmup.iters && "if we did more iterations than we want to do the next time, " "then we should have accepted the current iteration run."); } } MemoryManager::Result BenchmarkRunner::RunMemoryManager( IterationCount memory_iterations) { memory_manager->Start(); std::unique_ptr manager; manager.reset(new internal::ThreadManager(1)); b.Setup(); RunInThread(&b, memory_iterations, 0, manager.get(), perf_counters_measurement_ptr, /*profiler_manager=*/nullptr); manager.reset(); b.Teardown(); MemoryManager::Result memory_result; memory_manager->Stop(memory_result); memory_result.memory_iterations = memory_iterations; return memory_result; } void BenchmarkRunner::RunProfilerManager(IterationCount profile_iterations) { std::unique_ptr manager; manager.reset(new internal::ThreadManager(1)); b.Setup(); RunInThread(&b, profile_iterations, 0, manager.get(), /*perf_counters_measurement_ptr=*/nullptr, /*profiler_manager=*/profiler_manager); manager.reset(); b.Teardown(); } void BenchmarkRunner::DoOneRepetition() { assert(HasRepeatsRemaining() && "Already done all repetitions?"); const bool is_the_first_repetition = num_repetitions_done == 0; // In case a warmup phase is requested by the benchmark, run it now. // After running the warmup phase the BenchmarkRunner should be in a state as // this warmup never happened except the fact that warmup_done is set. Every // other manipulation of the BenchmarkRunner instance would be a bug! Please // fix it. if (!warmup_done) { RunWarmUp(); } IterationResults i; // We *may* be gradually increasing the length (iteration count) // of the benchmark until we decide the results are significant. // And once we do, we report those last results and exit. // Please do note that the if there are repetitions, the iteration count // is *only* calculated for the *first* repetition, and other repetitions // simply use that precomputed iteration count. for (;;) { b.Setup(); i = DoNIterations(); b.Teardown(); // Do we consider the results to be significant? // If we are doing repetitions, and the first repetition was already done, // it has calculated the correct iteration time, so we have run that very // iteration count just now. No need to calculate anything. Just report. // Else, the normal rules apply. const bool results_are_significant = !is_the_first_repetition || has_explicit_iteration_count || ShouldReportIterationResults(i); // Good, let's report them! if (results_are_significant) { break; } // Nope, bad iteration. Let's re-estimate the hopefully-sufficient // iteration count, and run the benchmark again... iters = PredictNumItersNeeded(i); assert(iters > i.iters && "if we did more iterations than we want to do the next time, " "then we should have accepted the current iteration run."); } // Produce memory measurements if requested. MemoryManager::Result memory_result; IterationCount memory_iterations = 0; if (memory_manager != nullptr) { // Only run a few iterations to reduce the impact of one-time // allocations in benchmarks that are not properly managed. memory_iterations = std::min(16, iters); memory_result = RunMemoryManager(memory_iterations); } if (profiler_manager != nullptr) { // We want to externally profile the benchmark for the same number of // iterations because, for example, if we're tracing the benchmark then we // want trace data to reasonably match PMU data. RunProfilerManager(iters); } // Ok, now actually report. BenchmarkReporter::Run report = CreateRunReport(b, i.results, memory_iterations, memory_result, i.seconds, num_repetitions_done, repeats); if (reports_for_family != nullptr) { ++reports_for_family->num_runs_done; if (report.skipped == 0u) { reports_for_family->Runs.push_back(report); } } run_results.non_aggregates.push_back(report); ++num_repetitions_done; } RunResults&& BenchmarkRunner::GetResults() { assert(!HasRepeatsRemaining() && "Did not run all repetitions yet?"); // Calculate additional statistics over the repetitions of this instance. run_results.aggregates_only = ComputeStats(run_results.non_aggregates); return std::move(run_results); } } // end namespace internal } // end namespace benchmark ================================================ FILE: src/benchmark_runner.h ================================================ // Copyright 2015 Google Inc. All rights reserved. // // Licensed under the Apache License, Version 2.0 (the "License"); // you may not use this file except in compliance with the License. // You may obtain a copy of the License at // // http://www.apache.org/licenses/LICENSE-2.0 // // Unless required by applicable law or agreed to in writing, software // distributed under the License is distributed on an "AS IS" BASIS, // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. // See the License for the specific language governing permissions and // limitations under the License. #ifndef BENCHMARK_RUNNER_H_ #define BENCHMARK_RUNNER_H_ #include #include #include #include "benchmark_api_internal.h" #include "perf_counters.h" #include "thread_manager.h" namespace benchmark { namespace internal { extern MemoryManager* memory_manager; extern ProfilerManager* profiler_manager; struct RunResults { std::vector non_aggregates; std::vector aggregates_only; bool display_report_aggregates_only = false; bool file_report_aggregates_only = false; }; struct BENCHMARK_EXPORT BenchTimeType { enum { UNSPECIFIED, ITERS, TIME } tag; union { IterationCount iters; double time; }; }; BENCHMARK_EXPORT BenchTimeType ParseBenchMinTime(const std::string& value); class BenchmarkRunner { public: BenchmarkRunner(const benchmark::internal::BenchmarkInstance& b_, benchmark::internal::PerfCountersMeasurement* pcm_, BenchmarkReporter::PerFamilyRunReports* reports_for_family); int GetNumRepeats() const { return repeats; } bool HasRepeatsRemaining() const { return GetNumRepeats() != num_repetitions_done; } void DoOneRepetition(); RunResults&& GetResults(); BenchmarkReporter::PerFamilyRunReports* GetReportsForFamily() const { return reports_for_family; } double GetMinTime() const { return min_time; } bool HasExplicitIters() const { return has_explicit_iteration_count; } IterationCount GetIters() const { return iters; } private: RunResults run_results; const benchmark::internal::BenchmarkInstance& b; BenchmarkReporter::PerFamilyRunReports* reports_for_family; BenchTimeType parsed_benchtime_flag; const double min_time; const double min_warmup_time; bool warmup_done; const int repeats; const bool has_explicit_iteration_count; int num_repetitions_done = 0; std::unique_ptr thread_runner; IterationCount iters; // preserved between repetitions! // So only the first repetition has to find/calculate it, // the other repetitions will just use that precomputed iteration count. PerfCountersMeasurement* const perf_counters_measurement_ptr = nullptr; struct IterationResults { internal::ThreadManager::Result results; IterationCount iters; double seconds; }; IterationResults DoNIterations(); MemoryManager::Result RunMemoryManager(IterationCount memory_iterations); void RunProfilerManager(IterationCount profile_iterations); IterationCount PredictNumItersNeeded(const IterationResults& i) const; bool ShouldReportIterationResults(const IterationResults& i) const; double GetMinTimeToApply() const; void FinishWarmUp(const IterationCount& i); void RunWarmUp(); }; } // namespace internal } // end namespace benchmark #endif // BENCHMARK_RUNNER_H_ ================================================ FILE: src/check.cc ================================================ #include "check.h" namespace benchmark { namespace internal { namespace { // NOLINTNEXTLINE(cppcoreguidelines-avoid-non-const-global-variables) AbortHandlerT* handler = &std::abort; } // namespace BENCHMARK_EXPORT AbortHandlerT*& GetAbortHandler() { return handler; } } // namespace internal } // namespace benchmark ================================================ FILE: src/check.h ================================================ #ifndef CHECK_H_ #define CHECK_H_ #include #include #include #include #include "benchmark/export.h" #include "internal_macros.h" #include "log.h" #if defined(__GNUC__) || defined(__clang__) #define BENCHMARK_NOEXCEPT noexcept #define BENCHMARK_NOEXCEPT_OP(x) noexcept(x) #elif defined(_MSC_VER) && !defined(__clang__) #if _MSC_VER >= 1900 #define BENCHMARK_NOEXCEPT noexcept #define BENCHMARK_NOEXCEPT_OP(x) noexcept(x) #else #define BENCHMARK_NOEXCEPT #define BENCHMARK_NOEXCEPT_OP(x) #endif #define __func__ __FUNCTION__ #else #define BENCHMARK_NOEXCEPT #define BENCHMARK_NOEXCEPT_OP(x) #endif namespace benchmark { namespace internal { typedef void(AbortHandlerT)(); BENCHMARK_EXPORT AbortHandlerT*& GetAbortHandler(); BENCHMARK_NORETURN inline void CallAbortHandler() { GetAbortHandler()(); std::flush(std::cout); std::flush(std::cerr); std::abort(); // fallback to enforce noreturn } // CheckHandler is the class constructed by failing BM_CHECK macros. // CheckHandler will log information about the failures and abort when it is // destructed. class CheckHandler { public: CheckHandler(std::string_view check, std::string_view file, std::string_view func, int line) : log_(GetErrorLogInstance()) { log_ << file << ":" << line << ": " << func << ": Check `" << check << "' failed. "; } LogType& GetLog() { return log_; } #if defined(COMPILER_MSVC) #pragma warning(push) #pragma warning(disable : 4722) #endif BENCHMARK_NORETURN ~CheckHandler() BENCHMARK_NOEXCEPT_OP(false) { log_ << '\n'; CallAbortHandler(); } #if defined(COMPILER_MSVC) #pragma warning(pop) #endif CheckHandler& operator=(const CheckHandler&) = delete; CheckHandler(const CheckHandler&) = delete; CheckHandler() = delete; private: LogType& log_; }; } // end namespace internal } // end namespace benchmark // The BM_CHECK macro returns a std::ostream object that can have extra // information written to it. #ifndef NDEBUG #define BM_CHECK(b) \ (b ? ::benchmark::internal::GetNullLogInstance() \ : ::benchmark::internal::CheckHandler( \ std::string_view(#b), std::string_view(__FILE__), \ std::string_view(__func__), __LINE__) \ .GetLog()) #else #define BM_CHECK(b) ::benchmark::internal::GetNullLogInstance() #endif // clang-format off // preserve whitespacing between operators for alignment #define BM_CHECK_EQ(a, b) BM_CHECK((a) == (b)) #define BM_CHECK_NE(a, b) BM_CHECK((a) != (b)) #define BM_CHECK_GE(a, b) BM_CHECK((a) >= (b)) #define BM_CHECK_LE(a, b) BM_CHECK((a) <= (b)) #define BM_CHECK_GT(a, b) BM_CHECK((a) > (b)) #define BM_CHECK_LT(a, b) BM_CHECK((a) < (b)) #define BM_CHECK_FLOAT_EQ(a, b, eps) BM_CHECK(std::fabs((a) - (b)) < (eps)) #define BM_CHECK_FLOAT_NE(a, b, eps) BM_CHECK(std::fabs((a) - (b)) >= (eps)) #define BM_CHECK_FLOAT_GE(a, b, eps) BM_CHECK((a) - (b) > -(eps)) #define BM_CHECK_FLOAT_LE(a, b, eps) BM_CHECK((b) - (a) > -(eps)) #define BM_CHECK_FLOAT_GT(a, b, eps) BM_CHECK((a) - (b) > (eps)) #define BM_CHECK_FLOAT_LT(a, b, eps) BM_CHECK((b) - (a) > (eps)) //clang-format on #endif // CHECK_H_ ================================================ FILE: src/colorprint.cc ================================================ // Copyright 2015 Google Inc. All rights reserved. // // Licensed under the Apache License, Version 2.0 (the "License"); // you may not use this file except in compliance with the License. // You may obtain a copy of the License at // // http://www.apache.org/licenses/LICENSE-2.0 // // Unless required by applicable law or agreed to in writing, software // distributed under the License is distributed on an "AS IS" BASIS, // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. // See the License for the specific language governing permissions and // limitations under the License. #include "colorprint.h" #include #include #include #include #include #include #include "check.h" #include "internal_macros.h" #ifdef BENCHMARK_OS_WINDOWS #include #include #else #include #endif // BENCHMARK_OS_WINDOWS namespace benchmark { namespace { #ifdef BENCHMARK_OS_WINDOWS typedef WORD PlatformColorCode; #else typedef const char* PlatformColorCode; #endif PlatformColorCode GetPlatformColorCode(LogColor color) { #ifdef BENCHMARK_OS_WINDOWS switch (color) { case COLOR_RED: return FOREGROUND_RED; case COLOR_GREEN: return FOREGROUND_GREEN; case COLOR_YELLOW: return FOREGROUND_RED | FOREGROUND_GREEN; case COLOR_BLUE: return FOREGROUND_BLUE; case COLOR_MAGENTA: return FOREGROUND_BLUE | FOREGROUND_RED; case COLOR_CYAN: return FOREGROUND_BLUE | FOREGROUND_GREEN; case COLOR_WHITE: // fall through to default default: return 0; } #else switch (color) { case COLOR_RED: return "1"; case COLOR_GREEN: return "2"; case COLOR_YELLOW: return "3"; case COLOR_BLUE: return "4"; case COLOR_MAGENTA: return "5"; case COLOR_CYAN: return "6"; case COLOR_WHITE: return "7"; default: return nullptr; }; #endif } } // end namespace std::string FormatString(const char* msg, va_list args) { // we might need a second shot at this, so pre-emptivly make a copy va_list args_cp; va_copy(args_cp, args); std::size_t size = 256; char local_buff[256]; auto ret = vsnprintf(local_buff, size, msg, args_cp); va_end(args_cp); // currently there is no error handling for failure, so this is hack. BM_CHECK(ret >= 0); if (ret == 0) { // handle empty expansion return {}; } if (static_cast(ret) < size) { return local_buff; } // we did not provide a long enough buffer on our first attempt. size = static_cast(ret) + 1; // + 1 for the null byte std::unique_ptr buff(new char[size]); va_list args_cp2; va_copy(args_cp2, args); ret = vsnprintf(buff.get(), size, msg, args_cp2); va_end(args_cp2); BM_CHECK(ret > 0 && (static_cast(ret)) < size); return buff.get(); } std::string FormatString(const char* msg, ...) { va_list args; va_start(args, msg); auto tmp = FormatString(msg, args); va_end(args); return tmp; } void ColorPrintf(std::ostream& out, LogColor color, const char* fmt, ...) { va_list args; va_start(args, fmt); ColorPrintf(out, color, fmt, args); va_end(args); } void ColorPrintf(std::ostream& out, LogColor color, const char* fmt, va_list args) { #ifdef BENCHMARK_OS_WINDOWS ((void)out); // suppress unused warning const HANDLE stdout_handle = GetStdHandle(STD_OUTPUT_HANDLE); // Gets the current text color. CONSOLE_SCREEN_BUFFER_INFO buffer_info; GetConsoleScreenBufferInfo(stdout_handle, &buffer_info); const WORD original_color_attrs = buffer_info.wAttributes; // We need to flush the stream buffers into the console before each // SetConsoleTextAttribute call lest it affect the text that is already // printed but has not yet reached the console. out.flush(); const WORD original_background_attrs = original_color_attrs & (BACKGROUND_RED | BACKGROUND_GREEN | BACKGROUND_BLUE | BACKGROUND_INTENSITY); SetConsoleTextAttribute(stdout_handle, GetPlatformColorCode(color) | FOREGROUND_INTENSITY | original_background_attrs); out << FormatString(fmt, args); out.flush(); // Restores the text and background color. SetConsoleTextAttribute(stdout_handle, original_color_attrs); #else const char* color_code = GetPlatformColorCode(color); if (color_code != nullptr) { out << FormatString("\033[0;3%sm", color_code); } out << FormatString(fmt, args) << "\033[m"; #endif } bool IsColorTerminal() { #if BENCHMARK_OS_WINDOWS // On Windows the TERM variable is usually not set, but the // console there does support colors. return 0 != _isatty(_fileno(stdout)); #else // On non-Windows platforms, we rely on the TERM variable. This list of // supported TERM values is copied from Google Test: // . const char* const SUPPORTED_TERM_VALUES[] = { "xterm", "xterm-color", "xterm-256color", "screen", "screen-256color", "tmux", "tmux-256color", "rxvt-unicode", "rxvt-unicode-256color", "linux", "cygwin", "xterm-kitty", "alacritty", "foot", "foot-extra", "wezterm", }; const char* const term = getenv("TERM"); bool term_supports_color = false; for (const char* candidate : SUPPORTED_TERM_VALUES) { if ((term != nullptr) && 0 == strcmp(term, candidate)) { term_supports_color = true; break; } } return 0 != isatty(fileno(stdout)) && term_supports_color; #endif // BENCHMARK_OS_WINDOWS } } // end namespace benchmark ================================================ FILE: src/colorprint.h ================================================ #ifndef BENCHMARK_COLORPRINT_H_ #define BENCHMARK_COLORPRINT_H_ #include #include #include #include "internal_macros.h" namespace benchmark { enum LogColor { COLOR_DEFAULT, COLOR_RED, COLOR_GREEN, COLOR_YELLOW, COLOR_BLUE, COLOR_MAGENTA, COLOR_CYAN, COLOR_WHITE }; PRINTF_FORMAT_STRING_FUNC(1, 0) std::string FormatString(const char* msg, va_list args); PRINTF_FORMAT_STRING_FUNC(1, 2) std::string FormatString(const char* msg, ...); PRINTF_FORMAT_STRING_FUNC(3, 0) void ColorPrintf(std::ostream& out, LogColor color, const char* fmt, va_list args); PRINTF_FORMAT_STRING_FUNC(3, 4) void ColorPrintf(std::ostream& out, LogColor color, const char* fmt, ...); // Returns true if stdout appears to be a terminal that supports colored // output, false otherwise. bool IsColorTerminal(); } // end namespace benchmark #endif // BENCHMARK_COLORPRINT_H_ ================================================ FILE: src/commandlineflags.cc ================================================ // Copyright 2015 Google Inc. All rights reserved. // // Licensed under the Apache License, Version 2.0 (the "License"); // you may not use this file except in compliance with the License. // You may obtain a copy of the License at // // http://www.apache.org/licenses/LICENSE-2.0 // // Unless required by applicable law or agreed to in writing, software // distributed under the License is distributed on an "AS IS" BASIS, // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. // See the License for the specific language governing permissions and // limitations under the License. #include "commandlineflags.h" #include #include #include #include #include #include #include #include #include "../src/string_util.h" namespace benchmark { namespace { // Parses 'str' for a 32-bit signed integer. If successful, writes // the result to *value and returns true; otherwise leaves *value // unchanged and returns false. bool ParseInt32(const std::string& src_text, const char* str, int32_t* value) { // Parses the environment variable as a decimal integer. char* end = nullptr; const long long_value = strtol(str, &end, 10); // NOLINT // Has strtol() consumed all characters in the string? if (*end != '\0') { // No - an invalid character was encountered. std::cerr << src_text << " is expected to be a 32-bit integer, " << "but actually has value \"" << str << "\".\n"; return false; } // Is the parsed value in the range of an Int32? const int32_t result = static_cast(long_value); if (long_value == std::numeric_limits::max() || long_value == std::numeric_limits::min() || // The parsed value overflows as a long. (strtol() returns // LONG_MAX or LONG_MIN when the input overflows.) result != long_value // The parsed value overflows as an Int32. ) { std::cerr << src_text << " is expected to be a 32-bit integer, " << "but actually has value \"" << str << "\", " << "which overflows.\n"; return false; } *value = result; return true; } // Parses 'str' for a double. If successful, writes the result to *value and // returns true; otherwise leaves *value unchanged and returns false. bool ParseDouble(const std::string& src_text, const char* str, double* value) { // Parses the environment variable as a decimal integer. char* end = nullptr; const double double_value = strtod(str, &end); // NOLINT // Has strtol() consumed all characters in the string? if (*end != '\0') { // No - an invalid character was encountered. std::cerr << src_text << " is expected to be a double, " << "but actually has value \"" << str << "\".\n"; return false; } *value = double_value; return true; } // Parses 'str' into KV pairs. If successful, writes the result to *value and // returns true; otherwise leaves *value unchanged and returns false. bool ParseKvPairs(const std::string& src_text, const char* str, std::map* value) { std::map kvs; for (const auto& kvpair : StrSplit(str, ',')) { const auto kv = StrSplit(kvpair, '='); if (kv.size() != 2) { std::cerr << src_text << " is expected to be a comma-separated list of " << "= strings, but actually has value \"" << str << "\".\n"; return false; } if (!kvs.emplace(kv[0], kv[1]).second) { std::cerr << src_text << " is expected to contain unique keys but key \"" << kv[0] << "\" was repeated.\n"; return false; } } *value = kvs; return true; } // Returns the name of the environment variable corresponding to the // given flag. For example, FlagToEnvVar("foo") will return // "BENCHMARK_FOO" in the open-source version. std::string FlagToEnvVar(const char* flag) { const std::string flag_str(flag); std::string env_var; for (size_t i = 0; i != flag_str.length(); ++i) { env_var += static_cast(::toupper(flag_str.c_str()[i])); } return env_var; } } // namespace BENCHMARK_EXPORT bool BoolFromEnv(const char* flag, bool default_val) { const std::string env_var = FlagToEnvVar(flag); const char* const value_str = getenv(env_var.c_str()); return value_str == nullptr ? default_val : IsTruthyFlagValue(value_str); } BENCHMARK_EXPORT int32_t Int32FromEnv(const char* flag, int32_t default_val) { const std::string env_var = FlagToEnvVar(flag); const char* const value_str = getenv(env_var.c_str()); int32_t value = default_val; if (value_str == nullptr || !ParseInt32(std::string("Environment variable ") + env_var, value_str, &value)) { return default_val; } return value; } BENCHMARK_EXPORT double DoubleFromEnv(const char* flag, double default_val) { const std::string env_var = FlagToEnvVar(flag); const char* const value_str = getenv(env_var.c_str()); double value = default_val; if (value_str == nullptr || !ParseDouble(std::string("Environment variable ") + env_var, value_str, &value)) { return default_val; } return value; } BENCHMARK_EXPORT const char* StringFromEnv(const char* flag, const char* default_val) { const std::string env_var = FlagToEnvVar(flag); const char* const value = getenv(env_var.c_str()); return value == nullptr ? default_val : value; } BENCHMARK_EXPORT std::map KvPairsFromEnv( const char* flag, std::map default_val) { const std::string env_var = FlagToEnvVar(flag); const char* const value_str = getenv(env_var.c_str()); if (value_str == nullptr) { return default_val; } std::map value; if (!ParseKvPairs("Environment variable " + env_var, value_str, &value)) { return default_val; } return value; } namespace { // Parses a string as a command line flag. The string should have // the format "--flag=value". When def_optional is true, the "=value" // part can be omitted. // // Returns the value of the flag, or nullptr if the parsing failed. const char* ParseFlagValue(const char* str, const char* flag, bool def_optional) { // str and flag must not be nullptr. if (str == nullptr || flag == nullptr) { return nullptr; } // The flag must start with "--". const std::string flag_str = std::string("--") + std::string(flag); const size_t flag_len = flag_str.length(); if (strncmp(str, flag_str.c_str(), flag_len) != 0) { return nullptr; } // Skips the flag name. const char* flag_end = str + flag_len; // When def_optional is true, it's OK to not have a "=value" part. if (def_optional && (flag_end[0] == '\0')) { return flag_end; } // If def_optional is true and there are more characters after the // flag name, or if def_optional is false, there must be a '=' after // the flag name. if (flag_end[0] != '=') { return nullptr; } // Returns the string after "=". return flag_end + 1; } } // end namespace BENCHMARK_EXPORT bool ParseBoolFlag(const char* str, const char* flag, bool* value) { // Gets the value of the flag as a string. const char* const value_str = ParseFlagValue(str, flag, true); // Aborts if the parsing failed. if (value_str == nullptr) { return false; } // Converts the string value to a bool. *value = IsTruthyFlagValue(value_str); return true; } BENCHMARK_EXPORT bool ParseInt32Flag(const char* str, const char* flag, int32_t* value) { // Gets the value of the flag as a string. const char* const value_str = ParseFlagValue(str, flag, false); // Aborts if the parsing failed. if (value_str == nullptr) { return false; } // Sets *value to the value of the flag. return ParseInt32(std::string("The value of flag --") + flag, value_str, value); } BENCHMARK_EXPORT bool ParseDoubleFlag(const char* str, const char* flag, double* value) { // Gets the value of the flag as a string. const char* const value_str = ParseFlagValue(str, flag, false); // Aborts if the parsing failed. if (value_str == nullptr) { return false; } // Sets *value to the value of the flag. return ParseDouble(std::string("The value of flag --") + flag, value_str, value); } BENCHMARK_EXPORT bool ParseStringFlag(const char* str, const char* flag, std::string* value) { // Gets the value of the flag as a string. const char* const value_str = ParseFlagValue(str, flag, false); // Aborts if the parsing failed. if (value_str == nullptr) { return false; } *value = value_str; return true; } BENCHMARK_EXPORT bool ParseKeyValueFlag(const char* str, const char* flag, std::map* value) { const char* const value_str = ParseFlagValue(str, flag, false); if (value_str == nullptr) { return false; } for (const auto& kvpair : StrSplit(value_str, ',')) { const auto kv = StrSplit(kvpair, '='); if (kv.size() != 2) { return false; } value->emplace(kv[0], kv[1]); } return true; } BENCHMARK_EXPORT bool IsFlag(const char* str, const char* flag) { return (ParseFlagValue(str, flag, true) != nullptr); } BENCHMARK_EXPORT bool IsTruthyFlagValue(const std::string& value) { if (value.size() == 1) { char v = value[0]; return isalnum(v) && !(v == '0' || v == 'f' || v == 'F' || v == 'n' || v == 'N'); } if (!value.empty()) { std::string value_lower(value); std::transform(value_lower.begin(), value_lower.end(), value_lower.begin(), [](char c) { return static_cast(::tolower(c)); }); return !(value_lower == "false" || value_lower == "no" || value_lower == "off"); } return true; } } // end namespace benchmark ================================================ FILE: src/commandlineflags.h ================================================ #ifndef BENCHMARK_COMMANDLINEFLAGS_H_ #define BENCHMARK_COMMANDLINEFLAGS_H_ #include #include #include #include "benchmark/export.h" // Macro for referencing flags. #define FLAG(name) FLAGS_##name // Macros for declaring flags. // NOLINTBEGIN(cppcoreguidelines-avoid-non-const-global-variables) #define BM_DECLARE_bool(name) BENCHMARK_EXPORT extern bool FLAG(name) #define BM_DECLARE_int32(name) BENCHMARK_EXPORT extern int32_t FLAG(name) #define BM_DECLARE_double(name) BENCHMARK_EXPORT extern double FLAG(name) #define BM_DECLARE_string(name) BENCHMARK_EXPORT extern std::string FLAG(name) #define BM_DECLARE_kvpairs(name) \ BENCHMARK_EXPORT extern std::map FLAG(name) // NOLINTEND(cppcoreguidelines-avoid-non-const-global-variables) // Macros for defining flags. // NOLINTBEGIN(cppcoreguidelines-avoid-non-const-global-variables) #define BM_DEFINE_bool(name, default_val) \ BENCHMARK_EXPORT bool FLAG(name) = benchmark::BoolFromEnv(#name, default_val) #define BM_DEFINE_int32(name, default_val) \ BENCHMARK_EXPORT int32_t FLAG(name) = \ benchmark::Int32FromEnv(#name, default_val) #define BM_DEFINE_double(name, default_val) \ BENCHMARK_EXPORT double FLAG(name) = \ benchmark::DoubleFromEnv(#name, default_val) #define BM_DEFINE_string(name, default_val) \ BENCHMARK_EXPORT std::string FLAG(name) = \ benchmark::StringFromEnv(#name, default_val) #define BM_DEFINE_kvpairs(name, default_val) \ BENCHMARK_EXPORT std::map FLAG(name) = \ benchmark::KvPairsFromEnv(#name, default_val) // NOLINTEND(cppcoreguidelines-avoid-non-const-global-variables) namespace benchmark { // Parses a bool from the environment variable corresponding to the given flag. // // If the variable exists, returns IsTruthyFlagValue() value; if not, // returns the given default value. BENCHMARK_EXPORT bool BoolFromEnv(const char* flag, bool default_val); // Parses an Int32 from the environment variable corresponding to the given // flag. // // If the variable exists, returns ParseInt32() value; if not, returns // the given default value. BENCHMARK_EXPORT int32_t Int32FromEnv(const char* flag, int32_t default_val); // Parses an Double from the environment variable corresponding to the given // flag. // // If the variable exists, returns ParseDouble(); if not, returns // the given default value. BENCHMARK_EXPORT double DoubleFromEnv(const char* flag, double default_val); // Parses a string from the environment variable corresponding to the given // flag. // // If variable exists, returns its value; if not, returns // the given default value. BENCHMARK_EXPORT const char* StringFromEnv(const char* flag, const char* default_val); // Parses a set of kvpairs from the environment variable corresponding to the // given flag. // // If variable exists, returns its value; if not, returns // the given default value. BENCHMARK_EXPORT std::map KvPairsFromEnv( const char* flag, std::map default_val); // Parses a string for a bool flag, in the form of either // "--flag=value" or "--flag". // // In the former case, the value is taken as true if it passes IsTruthyValue(). // // In the latter case, the value is taken as true. // // On success, stores the value of the flag in *value, and returns // true. On failure, returns false without changing *value. BENCHMARK_EXPORT bool ParseBoolFlag(const char* str, const char* flag, bool* value); // Parses a string for an Int32 flag, in the form of "--flag=value". // // On success, stores the value of the flag in *value, and returns // true. On failure, returns false without changing *value. BENCHMARK_EXPORT bool ParseInt32Flag(const char* str, const char* flag, int32_t* value); // Parses a string for a Double flag, in the form of "--flag=value". // // On success, stores the value of the flag in *value, and returns // true. On failure, returns false without changing *value. BENCHMARK_EXPORT bool ParseDoubleFlag(const char* str, const char* flag, double* value); // Parses a string for a string flag, in the form of "--flag=value". // // On success, stores the value of the flag in *value, and returns // true. On failure, returns false without changing *value. BENCHMARK_EXPORT bool ParseStringFlag(const char* str, const char* flag, std::string* value); // Parses a string for a kvpairs flag in the form "--flag=key=value,key=value" // // On success, stores the value of the flag in *value and returns true. On // failure returns false, though *value may have been mutated. BENCHMARK_EXPORT bool ParseKeyValueFlag(const char* str, const char* flag, std::map* value); // Returns true if the string matches the flag. BENCHMARK_EXPORT bool IsFlag(const char* str, const char* flag); // Returns true unless value starts with one of: '0', 'f', 'F', 'n' or 'N', or // some non-alphanumeric character. Also returns false if the value matches // one of 'no', 'false', 'off' (case-insensitive). As a special case, also // returns true if value is the empty string. BENCHMARK_EXPORT bool IsTruthyFlagValue(const std::string& value); } // end namespace benchmark #endif // BENCHMARK_COMMANDLINEFLAGS_H_ ================================================ FILE: src/complexity.cc ================================================ // Copyright 2016 Ismael Jimenez Martinez. All rights reserved. // // Licensed under the Apache License, Version 2.0 (the "License"); // you may not use this file except in compliance with the License. // You may obtain a copy of the License at // // http://www.apache.org/licenses/LICENSE-2.0 // // Unless required by applicable law or agreed to in writing, software // distributed under the License is distributed on an "AS IS" BASIS, // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. // See the License for the specific language governing permissions and // limitations under the License. // Source project : https://github.com/ismaelJimenez/cpp.leastsq // Adapted to be used with google benchmark #include "complexity.h" #include #include "benchmark/reporter.h" #include "benchmark/statistics.h" #include "benchmark/types.h" #include "check.h" namespace benchmark { namespace { // Internal function to calculate the different scalability forms BigOFunc* FittingCurve(BigO complexity) { switch (complexity) { case oN: return [](IterationCount n) -> double { return static_cast(n); }; case oNSquared: return [](IterationCount n) -> double { return std::pow(n, 2); }; case oNCubed: return [](IterationCount n) -> double { return std::pow(n, 3); }; case oLogN: return [](IterationCount n) -> double { return std::log2(static_cast(n)); }; case oNLogN: return [](IterationCount n) -> double { return static_cast(n) * std::log2(static_cast(n)); }; case o1: default: return [](IterationCount) { return 1.0; }; } } } // end namespace // Function to return an string for the calculated complexity std::string GetBigOString(BigO complexity) { switch (complexity) { case oN: return "N"; case oNSquared: return "N^2"; case oNCubed: return "N^3"; case oLogN: return "lgN"; case oNLogN: return "NlgN"; case o1: return "(1)"; default: return "f(N)"; } } namespace { // Find the coefficient for the high-order term in the running time, by // minimizing the sum of squares of relative error, for the fitting curve // given by the lambda expression. // - n : Vector containing the size of the benchmark tests. // - time : Vector containing the times for the benchmark tests. // - fitting_curve : lambda expression (e.g. [](ComplexityN n) {return n; };). // For a deeper explanation on the algorithm logic, please refer to // https://en.wikipedia.org/wiki/Least_squares#Least_squares,_regression_analysis_and_statistics LeastSq MinimalLeastSq(const std::vector& n, const std::vector& time, BigOFunc* fitting_curve) { double sigma_gn_squared = 0.0; double sigma_time = 0.0; double sigma_time_gn = 0.0; // Calculate least square fitting parameter for (size_t i = 0; i < n.size(); ++i) { double gn_i = fitting_curve(n[i]); sigma_gn_squared += gn_i * gn_i; sigma_time += time[i]; sigma_time_gn += time[i] * gn_i; } LeastSq result; result.complexity = oLambda; // Calculate complexity. result.coef = sigma_time_gn / sigma_gn_squared; // Calculate RMS double rms = 0.0; for (size_t i = 0; i < n.size(); ++i) { double fit = result.coef * fitting_curve(n[i]); rms += std::pow((time[i] - fit), 2); } // Normalized RMS by the mean of the observed values double mean = sigma_time / static_cast(n.size()); result.rms = std::sqrt(rms / static_cast(n.size())) / mean; return result; } // Find the coefficient for the high-order term in the running time, by // minimizing the sum of squares of relative error. // - n : Vector containing the size of the benchmark tests. // - time : Vector containing the times for the benchmark tests. // - complexity : If different than oAuto, the fitting curve will stick to // this one. If it is oAuto, it will be calculated the best // fitting curve. LeastSq MinimalLeastSq(const std::vector& n, const std::vector& time, const BigO complexity) { BM_CHECK_EQ(n.size(), time.size()); BM_CHECK_GE(n.size(), 2); // Do not compute fitting curve is less than two // benchmark runs are given BM_CHECK_NE(complexity, oNone); LeastSq best_fit; if (complexity == oAuto) { std::vector fit_curves = {oLogN, oN, oNLogN, oNSquared, oNCubed}; // Take o1 as default best fitting curve best_fit = MinimalLeastSq(n, time, FittingCurve(o1)); best_fit.complexity = o1; // Compute all possible fitting curves and stick to the best one for (const auto& fit : fit_curves) { LeastSq current_fit = MinimalLeastSq(n, time, FittingCurve(fit)); if (current_fit.rms < best_fit.rms) { best_fit = current_fit; best_fit.complexity = fit; } } } else { best_fit = MinimalLeastSq(n, time, FittingCurve(complexity)); best_fit.complexity = complexity; } return best_fit; } } // end namespace std::vector ComputeBigO( const std::vector& reports) { typedef BenchmarkReporter::Run Run; std::vector results; if (reports.size() < 2) { return results; } // Accumulators. std::vector n; std::vector real_time; std::vector cpu_time; // Populate the accumulators. for (const Run& run : reports) { BM_CHECK_GT(run.complexity_n, 0) << "Did you forget to call SetComplexityN?"; n.push_back(run.complexity_n); real_time.push_back(run.real_accumulated_time / static_cast(run.iterations)); cpu_time.push_back(run.cpu_accumulated_time / static_cast(run.iterations)); } LeastSq result_cpu; LeastSq result_real; if (reports[0].complexity == oLambda) { result_cpu = MinimalLeastSq(n, cpu_time, reports[0].complexity_lambda); result_real = MinimalLeastSq(n, real_time, reports[0].complexity_lambda); } else { const BigO* InitialBigO = &reports[0].complexity; const bool use_real_time_for_initial_big_o = reports[0].use_real_time_for_initial_big_o; if (use_real_time_for_initial_big_o) { result_real = MinimalLeastSq(n, real_time, *InitialBigO); InitialBigO = &result_real.complexity; // The Big-O complexity for CPU time must have the same Big-O function! } result_cpu = MinimalLeastSq(n, cpu_time, *InitialBigO); InitialBigO = &result_cpu.complexity; if (!use_real_time_for_initial_big_o) { result_real = MinimalLeastSq(n, real_time, *InitialBigO); } } // Drop the 'args' when reporting complexity. auto run_name = reports[0].run_name; run_name.args.clear(); // Get the data from the accumulator to BenchmarkReporter::Run's. Run big_o; big_o.run_name = run_name; big_o.family_index = reports[0].family_index; big_o.per_family_instance_index = reports[0].per_family_instance_index; big_o.run_type = BenchmarkReporter::Run::RT_Aggregate; big_o.repetitions = reports[0].repetitions; big_o.repetition_index = Run::no_repetition_index; big_o.threads = reports[0].threads; big_o.aggregate_name = "BigO"; big_o.aggregate_unit = StatisticUnit::kTime; big_o.report_label = reports[0].report_label; big_o.iterations = 0; big_o.real_accumulated_time = result_real.coef; big_o.cpu_accumulated_time = result_cpu.coef; big_o.report_big_o = true; big_o.complexity = result_cpu.complexity; // All the time results are reported after being multiplied by the // time unit multiplier. But since RMS is a relative quantity it // should not be multiplied at all. So, here, we _divide_ it by the // multiplier so that when it is multiplied later the result is the // correct one. double multiplier = GetTimeUnitMultiplier(reports[0].time_unit); // Only add label to mean/stddev if it is same for all runs Run rms; rms.run_name = run_name; rms.family_index = reports[0].family_index; rms.per_family_instance_index = reports[0].per_family_instance_index; rms.run_type = BenchmarkReporter::Run::RT_Aggregate; rms.aggregate_name = "RMS"; rms.aggregate_unit = StatisticUnit::kPercentage; rms.report_label = big_o.report_label; rms.iterations = 0; rms.repetition_index = Run::no_repetition_index; rms.repetitions = reports[0].repetitions; rms.threads = reports[0].threads; rms.real_accumulated_time = result_real.rms / multiplier; rms.cpu_accumulated_time = result_cpu.rms / multiplier; rms.report_rms = true; rms.complexity = result_cpu.complexity; // don't forget to keep the time unit, or we won't be able to // recover the correct value. rms.time_unit = reports[0].time_unit; results.push_back(big_o); results.push_back(rms); return results; } } // end namespace benchmark ================================================ FILE: src/complexity.h ================================================ // Copyright 2016 Ismael Jimenez Martinez. All rights reserved. // // Licensed under the Apache License, Version 2.0 (the "License"); // you may not use this file except in compliance with the License. // You may obtain a copy of the License at // // http://www.apache.org/licenses/LICENSE-2.0 // // Unless required by applicable law or agreed to in writing, software // distributed under the License is distributed on an "AS IS" BASIS, // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. // See the License for the specific language governing permissions and // limitations under the License. // Source project : https://github.com/ismaelJimenez/cpp.leastsq // Adapted to be used with google benchmark #ifndef COMPLEXITY_H_ #define COMPLEXITY_H_ #include #include #include "benchmark/reporter.h" #include "benchmark/statistics.h" namespace benchmark { // Return a vector containing the bigO and RMS information for the specified // list of reports. If 'reports.size() < 2' an empty vector is returned. std::vector ComputeBigO( const std::vector& reports); // This data structure will contain the result returned by MinimalLeastSq // - coef : Estimated coefficient for the high-order term as // interpolated from data. // - rms : Normalized Root Mean Squared Error. // - complexity : Scalability form (e.g. oN, oNLogN). In case a scalability // form has been provided to MinimalLeastSq this will return // the same value. In case BigO::oAuto has been selected, this // parameter will return the best fitting curve detected. struct LeastSq { LeastSq() : coef(0.0), rms(0.0), complexity(oNone) {} double coef; double rms; BigO complexity; }; // Function to return an string for the calculated complexity std::string GetBigOString(BigO complexity); } // end namespace benchmark #endif // COMPLEXITY_H_ ================================================ FILE: src/console_reporter.cc ================================================ // Copyright 2015 Google Inc. All rights reserved. // // Licensed under the Apache License, Version 2.0 (the "License"); // you may not use this file except in compliance with the License. // You may obtain a copy of the License at // // http://www.apache.org/licenses/LICENSE-2.0 // // Unless required by applicable law or agreed to in writing, software // distributed under the License is distributed on an "AS IS" BASIS, // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. // See the License for the specific language governing permissions and // limitations under the License. #include #include #include #include #include #include #include #include #include "benchmark/export.h" #include "benchmark/reporter.h" #include "benchmark/types.h" #include "check.h" #include "colorprint.h" #include "commandlineflags.h" #include "complexity.h" #include "counter.h" #include "internal_macros.h" #include "string_util.h" #include "timers.h" namespace benchmark { BENCHMARK_EXPORT bool ConsoleReporter::ReportContext(const Context& context) { name_field_width_ = context.name_field_width; printed_header_ = false; prev_counters_.clear(); PrintBasicContext(&GetErrorStream(), context); #ifdef BENCHMARK_OS_WINDOWS if ((output_options_ & OO_Color)) { auto stdOutBuf = std::cout.rdbuf(); auto outStreamBuf = GetOutputStream().rdbuf(); if (stdOutBuf != outStreamBuf) { GetErrorStream() << "Color printing is only supported for stdout on windows." " Disabling color printing\n"; output_options_ = static_cast(output_options_ & ~OO_Color); } } #endif return true; } BENCHMARK_EXPORT void ConsoleReporter::PrintHeader(const Run& run) { std::string str = FormatString("%-*s %13s %15s %12s", static_cast(name_field_width_), "Benchmark", "Time", "CPU", "Iterations"); if (!run.counters.empty()) { if ((output_options_ & OO_Tabular) != 0) { for (auto const& c : run.counters) { str += FormatString(" %10s", c.first.c_str()); } } else { str += " UserCounters..."; } } std::string line = std::string(str.length(), '-'); GetOutputStream() << line << "\n" << str << "\n" << line << "\n"; } BENCHMARK_EXPORT void ConsoleReporter::ReportRuns(const std::vector& reports) { for (const auto& run : reports) { // print the header: // --- if none was printed yet bool print_header = !printed_header_; // --- or if the format is tabular and this run // has different fields from the prev header print_header |= ((output_options_ & OO_Tabular) != 0) && (!internal::SameNames(run.counters, prev_counters_)); if (print_header) { printed_header_ = true; prev_counters_ = run.counters; PrintHeader(run); } // As an alternative to printing the headers like this, we could sort // the benchmarks by header and then print. But this would require // waiting for the full results before printing, or printing twice. PrintRunData(run); } } PRINTF_FORMAT_STRING_FUNC(3, 4) static void IgnoreColorPrint(std::ostream& out, LogColor /*unused*/, const char* fmt, ...) { va_list args; va_start(args, fmt); out << FormatString(fmt, args); va_end(args); } static std::string FormatTime(double time) { // For the time columns of the console printer 13 digits are reserved. One of // them is a space and max two of them are the time unit (e.g ns). That puts // us at 10 digits usable for the number. // Align decimal places... if (time < 1.0) { return FormatString("%10.3f", time); } if (time < 10.0) { return FormatString("%10.2f", time); } if (time < 100.0) { return FormatString("%10.1f", time); } // Assuming the time is at max 9.9999e+99 and we have 10 digits for the // number, we get 10-1(.)-1(e)-1(sign)-2(exponent) = 5 digits to print. if (time > 9999999999 /*max 10 digit number*/) { return FormatString("%1.4e", time); } return FormatString("%10.0f", time); } BENCHMARK_EXPORT void ConsoleReporter::PrintRunData(const Run& result) { typedef void(PrinterFn)(std::ostream&, LogColor, const char*, ...); auto& Out = GetOutputStream(); PrinterFn* printer = (output_options_ & OO_Color) != 0 ? static_cast(ColorPrintf) : IgnoreColorPrint; auto name_color = (result.report_big_o || result.report_rms) ? COLOR_BLUE : COLOR_GREEN; printer(Out, name_color, "%-*s ", static_cast(name_field_width_), result.benchmark_name().c_str()); if (internal::SkippedWithError == result.skipped) { printer(Out, COLOR_RED, "ERROR OCCURRED: \'%s\'", result.skip_message.c_str()); printer(Out, COLOR_DEFAULT, "\n"); return; } if (internal::SkippedWithMessage == result.skipped) { printer(Out, COLOR_WHITE, "SKIPPED: \'%s\'", result.skip_message.c_str()); printer(Out, COLOR_DEFAULT, "\n"); return; } const double real_time = result.GetAdjustedRealTime(); const double cpu_time = result.GetAdjustedCPUTime(); const std::string real_time_str = FormatTime(real_time); const std::string cpu_time_str = FormatTime(cpu_time); if (result.report_big_o) { std::string big_o = GetBigOString(result.complexity); printer(Out, COLOR_YELLOW, "%10.2f %-4s %10.2f %-4s ", real_time, big_o.c_str(), cpu_time, big_o.c_str()); } else if (result.report_rms) { printer(Out, COLOR_YELLOW, "%10.0f %-4s %10.0f %-4s ", real_time * 100, "%", cpu_time * 100, "%"); } else if (result.run_type != Run::RT_Aggregate || result.aggregate_unit == StatisticUnit::kTime) { const char* timeLabel = GetTimeUnitString(result.time_unit); printer(Out, COLOR_YELLOW, "%s %-4s %s %-4s ", real_time_str.c_str(), timeLabel, cpu_time_str.c_str(), timeLabel); } else { assert(result.aggregate_unit == StatisticUnit::kPercentage); printer(Out, COLOR_YELLOW, "%10.2f %-4s %10.2f %-4s ", (100. * result.real_accumulated_time), "%", (100. * result.cpu_accumulated_time), "%"); } if (!result.report_big_o && !result.report_rms) { printer(Out, COLOR_CYAN, "%10lld", result.iterations); } for (const auto& c : result.counters) { const std::size_t cNameLen = std::max(static_cast(10), c.first.length()); std::string s; const char* unit = ""; if (result.run_type == Run::RT_Aggregate && result.aggregate_unit == StatisticUnit::kPercentage) { s = StrFormat("%.2f", 100. * c.second.value); unit = "%"; } else { s = HumanReadableNumber(c.second.value, c.second.oneK); if ((c.second.flags & Counter::kIsRate) != 0) { unit = (c.second.flags & Counter::kInvert) != 0 ? "s" : "/s"; } } if ((output_options_ & OO_Tabular) != 0) { printer(Out, COLOR_DEFAULT, " %*s%s", static_cast(cNameLen - strlen(unit)), s.c_str(), unit); } else { printer(Out, COLOR_DEFAULT, " %s=%s%s", c.first.c_str(), s.c_str(), unit); } } if (!result.report_label.empty()) { printer(Out, COLOR_DEFAULT, " %s", result.report_label.c_str()); } printer(Out, COLOR_DEFAULT, "\n"); } } // end namespace benchmark ================================================ FILE: src/counter.cc ================================================ // Copyright 2015 Google Inc. All rights reserved. // // Licensed under the Apache License, Version 2.0 (the "License"); // you may not use this file except in compliance with the License. // You may obtain a copy of the License at // // http://www.apache.org/licenses/LICENSE-2.0 // // Unless required by applicable law or agreed to in writing, software // distributed under the License is distributed on an "AS IS" BASIS, // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. // See the License for the specific language governing permissions and // limitations under the License. #include "counter.h" namespace benchmark { namespace internal { namespace { double Finish(Counter const& c, IterationCount iterations, double cpu_time, double num_threads) { double v = c.value; if ((c.flags & Counter::kIsRate) != 0) { v /= cpu_time; } if ((c.flags & Counter::kAvgThreads) != 0) { v /= num_threads; } if ((c.flags & Counter::kIsIterationInvariant) != 0) { v *= static_cast(iterations); } if ((c.flags & Counter::kAvgIterations) != 0) { v /= static_cast(iterations); } if ((c.flags & Counter::kInvert) != 0) { // Invert is *always* last. v = 1.0 / v; } return v; } } // namespace void Finish(UserCounters* l, IterationCount iterations, double cpu_time, double num_threads) { for (auto& c : *l) { c.second.value = Finish(c.second, iterations, cpu_time, num_threads); } } void Increment(UserCounters* l, UserCounters const& r) { // add counters present in both or just in *l for (auto& c : *l) { auto it = r.find(c.first); if (it != r.end()) { c.second.value = c.second + it->second; } } // add counters present in r, but not in *l for (auto const& tc : r) { auto it = l->find(tc.first); if (it == l->end()) { (*l)[tc.first] = tc.second; } } } bool SameNames(UserCounters const& l, UserCounters const& r) { if (&l == &r) { return true; } if (l.size() != r.size()) { return false; } for (auto const& c : l) { if (r.find(c.first) == r.end()) { return false; } } return true; } } // end namespace internal } // end namespace benchmark ================================================ FILE: src/counter.h ================================================ // Copyright 2015 Google Inc. All rights reserved. // // Licensed under the Apache License, Version 2.0 (the "License"); // you may not use this file except in compliance with the License. // You may obtain a copy of the License at // // http://www.apache.org/licenses/LICENSE-2.0 // // Unless required by applicable law or agreed to in writing, software // distributed under the License is distributed on an "AS IS" BASIS, // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. // See the License for the specific language governing permissions and // limitations under the License. #ifndef BENCHMARK_SRC_COUNTER_H_ #define BENCHMARK_SRC_COUNTER_H_ #include "benchmark/counter.h" #include "benchmark/export.h" #include "benchmark/types.h" namespace benchmark { namespace internal { void Finish(UserCounters* l, IterationCount iterations, double time, double num_threads); void Increment(UserCounters* l, UserCounters const& r); bool SameNames(UserCounters const& l, UserCounters const& r); } // end namespace internal } // end namespace benchmark #endif // BENCHMARK_SRC_COUNTER_H_ ================================================ FILE: src/csv_reporter.cc ================================================ // Copyright 2015 Google Inc. All rights reserved. // // Licensed under the Apache License, Version 2.0 (the "License"); // you may not use this file except in compliance with the License. // You may obtain a copy of the License at // // http://www.apache.org/licenses/LICENSE-2.0 // // Unless required by applicable law or agreed to in writing, software // distributed under the License is distributed on an "AS IS" BASIS, // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. // See the License for the specific language governing permissions and // limitations under the License. #include #include #include #include "benchmark/export.h" #include "benchmark/reporter.h" #include "check.h" #include "complexity.h" // File format reference: http://edoceo.com/utilitas/csv-file-format. namespace benchmark { namespace { const std::vector elements = { "name", "iterations", "real_time", "cpu_time", "time_unit", "bytes_per_second", "items_per_second", "label", "error_occurred", "error_message"}; std::string CsvEscape(const std::string& s) { std::string tmp; tmp.reserve(s.size() + 2); for (char c : s) { switch (c) { case '"': tmp += "\"\""; break; default: tmp += c; break; } } return '"' + tmp + '"'; } } // namespace BENCHMARK_EXPORT bool CSVReporter::ReportContext(const Context& context) { PrintBasicContext(&GetErrorStream(), context); return true; } BENCHMARK_EXPORT void CSVReporter::ReportRuns(const std::vector& reports) { std::ostream& Out = GetOutputStream(); if (!printed_header_) { // save the names of all the user counters for (const auto& run : reports) { for (const auto& cnt : run.counters) { if (cnt.first == "bytes_per_second" || cnt.first == "items_per_second") { continue; } user_counter_names_.insert(cnt.first); } } // print the header for (auto B = elements.begin(); B != elements.end();) { Out << *B++; if (B != elements.end()) { Out << ","; } } for (auto B = user_counter_names_.begin(); B != user_counter_names_.end();) { Out << ",\"" << *B++ << "\""; } Out << "\n"; printed_header_ = true; } else { // check that all the current counters are saved in the name set for (const auto& run : reports) { for (const auto& cnt : run.counters) { if (cnt.first == "bytes_per_second" || cnt.first == "items_per_second") { continue; } BM_CHECK(user_counter_names_.find(cnt.first) != user_counter_names_.end()) << "All counters must be present in each run. " << "Counter named \"" << cnt.first << "\" was not in a run after being added to the header"; } } } // print results for each run for (const auto& run : reports) { PrintRunData(run); } } BENCHMARK_EXPORT void CSVReporter::PrintRunData(const Run& run) { std::ostream& Out = GetOutputStream(); Out << CsvEscape(run.benchmark_name()) << ","; if (run.skipped != 0u) { Out << std::string(elements.size() - 3, ','); Out << std::boolalpha << (internal::SkippedWithError == run.skipped) << ","; Out << CsvEscape(run.skip_message) << "\n"; return; } // Do not print iteration on bigO and RMS report if (!run.report_big_o && !run.report_rms) { Out << run.iterations; } Out << ","; if (run.run_type != Run::RT_Aggregate || run.aggregate_unit == StatisticUnit::kTime) { Out << run.GetAdjustedRealTime() << ","; Out << run.GetAdjustedCPUTime() << ","; } else { assert(run.aggregate_unit == StatisticUnit::kPercentage); Out << run.real_accumulated_time << ","; Out << run.cpu_accumulated_time << ","; } // Do not print timeLabel on bigO and RMS report if (run.report_big_o) { Out << GetBigOString(run.complexity); } else if (!run.report_rms && run.aggregate_unit != StatisticUnit::kPercentage) { Out << GetTimeUnitString(run.time_unit); } Out << ","; if (run.counters.find("bytes_per_second") != run.counters.end()) { Out << run.counters.at("bytes_per_second"); } Out << ","; if (run.counters.find("items_per_second") != run.counters.end()) { Out << run.counters.at("items_per_second"); } Out << ","; if (!run.report_label.empty()) { Out << CsvEscape(run.report_label); } Out << ",,"; // for error_occurred and error_message // Print user counters for (const auto& ucn : user_counter_names_) { auto it = run.counters.find(ucn); if (it == run.counters.end()) { Out << ","; } else { Out << "," << it->second; } } Out << '\n'; } } // end namespace benchmark ================================================ FILE: src/cycleclock.h ================================================ // ---------------------------------------------------------------------- // CycleClock // A CycleClock tells you the current time in Cycles. The "time" // is actually time since power-on. This is like time() but doesn't // involve a system call and is much more precise. // // NOTE: Not all cpu/platform/kernel combinations guarantee that this // clock increments at a constant rate or is synchronized across all logical // cpus in a system. // // If you need the above guarantees, please consider using a different // API. There are efforts to provide an interface which provides a millisecond // granularity and implemented as a memory read. A memory read is generally // cheaper than the CycleClock for many architectures. // // Also, in some out of order CPU implementations, the CycleClock is not // serializing. So if you're trying to count at cycles granularity, your // data might be inaccurate due to out of order instruction execution. // ---------------------------------------------------------------------- #ifndef BENCHMARK_CYCLECLOCK_H_ #define BENCHMARK_CYCLECLOCK_H_ #include #include "benchmark/macros.h" #include "internal_macros.h" #if defined(BENCHMARK_OS_MACOSX) #include #endif // For MSVC, we want to use '_asm rdtsc' when possible (since it works // with even ancient MSVC compilers), and when not possible the // __rdtsc intrinsic, declared in . Unfortunately, in some // environments, and have conflicting // declarations of some other intrinsics, breaking compilation. // Therefore, we simply declare __rdtsc ourselves. See also // http://connect.microsoft.com/VisualStudio/feedback/details/262047 // // Note that MSVC defines the x64 preprocessor macros when building // for Arm64EC, despite it using Arm64 assembly instructions. #if defined(COMPILER_MSVC) && !defined(_M_IX86) && !defined(_M_ARM64) && \ !defined(_M_ARM64EC) extern "C" uint64_t __rdtsc(); #pragma intrinsic(__rdtsc) #endif #if !defined(BENCHMARK_OS_WINDOWS) || defined(BENCHMARK_OS_MINGW) #include #include #endif #ifdef BENCHMARK_OS_EMSCRIPTEN #include #endif namespace benchmark { // NOTE: only i386 and x86_64 have been well tested. // PPC, sparc, alpha, and ia64 are based on // http://peter.kuscsik.com/wordpress/?p=14 // with modifications by m3b. See also // https://setisvn.ssl.berkeley.edu/svn/lib/fftw-3.0.1/kernel/cycle.h namespace cycleclock { // This should return the number of cycles since power-on. Thread-safe. inline BENCHMARK_ALWAYS_INLINE int64_t Now() { #if defined(BENCHMARK_OS_MACOSX) // this goes at the top because we need ALL Macs, regardless of // architecture, to return the number of "mach time units" that // have passed since startup. See sysinfo.cc where // InitializeSystemInfo() sets the supposed cpu clock frequency of // macs to the number of mach time units per second, not actual // CPU clock frequency (which can change in the face of CPU // frequency scaling). Also note that when the Mac sleeps, this // counter pauses; it does not continue counting, nor does it // reset to zero. return static_cast(mach_absolute_time()); #elif defined(BENCHMARK_OS_EMSCRIPTEN) // this goes above x86-specific code because old versions of Emscripten // define __x86_64__, although they have nothing to do with it. return static_cast(emscripten_get_now() * 1e+6); #elif defined(__i386__) int64_t ret; __asm__ volatile("rdtsc" : "=A"(ret)); return ret; // Note that Clang, like MSVC, defines the x64 preprocessor macros when building // for Arm64EC, despite it using Arm64 assembly instructions. #elif (defined(__x86_64__) || defined(__amd64__)) && !defined(__arm64ec__) uint64_t low, high; __asm__ volatile("rdtsc" : "=a"(low), "=d"(high)); return static_cast((high << 32) | low); #elif defined(__powerpc__) || defined(__ppc__) // This returns a time-base, which is not always precisely a cycle-count. #if defined(__powerpc64__) || defined(__ppc64__) int64_t tb; asm volatile("mfspr %0, 268" : "=r"(tb)); return tb; #else uint32_t tbl, tbu0, tbu1; asm volatile( "mftbu %0\n" "mftb %1\n" "mftbu %2" : "=r"(tbu0), "=r"(tbl), "=r"(tbu1)); tbl &= -static_cast(tbu0 == tbu1); // high 32 bits in tbu1; low 32 bits in tbl (tbu0 is no longer needed) return (static_cast(tbu1) << 32) | tbl; #endif #elif defined(__sparc__) int64_t tick; asm(".byte 0x83, 0x41, 0x00, 0x00"); asm("mov %%g1, %0" : "=r"(tick)); return tick; #elif defined(__ia64__) int64_t itc; asm("mov %0 = ar.itc" : "=r"(itc)); return itc; #elif defined(COMPILER_MSVC) && defined(_M_IX86) // Older MSVC compilers (like 7.x) don't seem to support the // __rdtsc intrinsic properly, so I prefer to use _asm instead // when I know it will work. Otherwise, I'll use __rdtsc and hope // the code is being compiled with a non-ancient compiler. _asm rdtsc #elif defined(COMPILER_MSVC) && (defined(_M_ARM64) || defined(_M_ARM64EC)) // See // https://docs.microsoft.com/en-us/cpp/intrinsics/arm64-intrinsics // and https://reviews.llvm.org/D53115 int64_t virtual_timer_value; virtual_timer_value = _ReadStatusReg(ARM64_CNTVCT); return virtual_timer_value; #elif defined(COMPILER_MSVC) return __rdtsc(); #elif defined(BENCHMARK_OS_NACL) // Native Client validator on x86/x86-64 allows RDTSC instructions, // and this case is handled above. Native Client validator on ARM // rejects MRC instructions (used in the ARM-specific sequence below), // so we handle it here. Portable Native Client compiles to // architecture-agnostic bytecode, which doesn't provide any // cycle counter access mnemonics. // Native Client does not provide any API to access cycle counter. // Use clock_gettime(CLOCK_MONOTONIC, ...) instead of gettimeofday // because is provides nanosecond resolution (which is noticeable at // least for PNaCl modules running on x86 Mac & Linux). // Initialize to always return 0 if clock_gettime fails. struct timespec ts = {0, 0}; clock_gettime(CLOCK_MONOTONIC, &ts); return static_cast(ts.tv_sec) * 1000000000 + ts.tv_nsec; #elif defined(__aarch64__) || defined(__arm64ec__) // System timer of ARMv8 runs at a different frequency than the CPU's. // The frequency is fixed, typically in the range 1-50MHz. It can be // read at CNTFRQ special register. We assume the OS has set up // the virtual timer properly. int64_t virtual_timer_value; asm volatile("mrs %0, cntvct_el0" : "=r"(virtual_timer_value)); return virtual_timer_value; #elif defined(__ARM_ARCH) // V6 is the earliest arch that has a standard cyclecount // Native Client validator doesn't allow MRC instructions. #if (__ARM_ARCH >= 6) uint32_t pmccntr; uint32_t pmuseren; uint32_t pmcntenset; // Read the user mode perf monitor counter access permissions. asm volatile("mrc p15, 0, %0, c9, c14, 0" : "=r"(pmuseren)); if (pmuseren & 1) { // Allows reading perfmon counters for user mode code. asm volatile("mrc p15, 0, %0, c9, c12, 1" : "=r"(pmcntenset)); if (pmcntenset & 0x80000000ul) { // Is it counting? asm volatile("mrc p15, 0, %0, c9, c13, 0" : "=r"(pmccntr)); // The counter is set up to count every 64th cycle return static_cast(pmccntr) * 64; // Should optimize to << 6 } } #endif struct timeval tv; gettimeofday(&tv, nullptr); return static_cast(tv.tv_sec) * 1000000 + tv.tv_usec; #elif defined(__mips__) || defined(__m68k__) // mips apparently only allows rdtsc for superusers, so we fall // back to gettimeofday. It's possible clock_gettime would be better. struct timeval tv; gettimeofday(&tv, nullptr); return static_cast(tv.tv_sec) * 1000000 + tv.tv_usec; #elif defined(__loongarch__) || defined(__csky__) struct timeval tv; gettimeofday(&tv, nullptr); return static_cast(tv.tv_sec) * 1000000 + tv.tv_usec; #elif defined(__s390__) // Covers both s390 and s390x. // Return the CPU clock. uint64_t tsc; #if defined(BENCHMARK_OS_ZOS) // z/OS HLASM syntax. asm(" stck %0" : "=m"(tsc) : : "cc"); #else // Linux on Z syntax. asm("stck %0" : "=Q"(tsc) : : "cc"); #endif return tsc; #elif defined(__riscv) // RISC-V // Use RDTIME (and RDTIMEH on riscv32). // RDCYCLE is a privileged instruction since Linux 6.6. #if __riscv_xlen == 32 uint32_t cycles_lo, cycles_hi0, cycles_hi1; // This asm also includes the PowerPC overflow handling strategy, as above. // Implemented in assembly because Clang insisted on branching. asm volatile( "rdtimeh %0\n" "rdtime %1\n" "rdtimeh %2\n" "sub %0, %0, %2\n" "seqz %0, %0\n" "sub %0, zero, %0\n" "and %1, %1, %0\n" : "=r"(cycles_hi0), "=r"(cycles_lo), "=r"(cycles_hi1)); return static_cast((static_cast(cycles_hi1) << 32) | cycles_lo); #else uint64_t cycles; asm volatile("rdtime %0" : "=r"(cycles)); return static_cast(cycles); #endif #elif defined(__e2k__) || defined(__elbrus__) struct timeval tv; gettimeofday(&tv, nullptr); return static_cast(tv.tv_sec) * 1000000 + tv.tv_usec; #elif defined(__hexagon__) uint64_t pcycle; asm volatile("%0 = C15:14" : "=r"(pcycle)); return static_cast(pcycle); #elif defined(__alpha__) // Alpha has a cycle counter, the PCC register, but it is an unsigned 32-bit // integer and thus wraps every ~4s, making using it for tick counts // unreliable beyond this time range. The real-time clock is low-precision, // roughtly ~1ms, but it is the only option that can reasonable count // indefinitely. struct timeval tv; gettimeofday(&tv, nullptr); return static_cast(tv.tv_sec) * 1000000 + tv.tv_usec; #elif defined(__hppa__) || defined(__linux__) || defined(BENCHMARK_OS_WASI) // Fallback for all other architectures with a recent Linux kernel, e.g.: // HP PA-RISC provides a user-readable clock counter (cr16), but // it's not syncronized across CPUs and only 32-bit wide when programs // are built as 32-bit binaries. // Same for SH-4 and possibly others. // Use clock_gettime(CLOCK_MONOTONIC, ...) instead of gettimeofday // because is provides nanosecond resolution. // Initialize to always return 0 if clock_gettime fails. struct timespec ts = {0, 0}; clock_gettime(CLOCK_MONOTONIC, &ts); return static_cast(ts.tv_sec) * 1000000000 + ts.tv_nsec; #else // The soft failover to a generic implementation is automatic only for ARM. // For other platforms the developer is expected to make an attempt to create // a fast implementation and use generic version if nothing better is // available. #error You need to define CycleTimer for your OS and CPU #endif } } // end namespace cycleclock } // end namespace benchmark #endif // BENCHMARK_CYCLECLOCK_H_ ================================================ FILE: src/internal_macros.h ================================================ #ifndef BENCHMARK_INTERNAL_MACROS_H_ #define BENCHMARK_INTERNAL_MACROS_H_ /* Needed to detect STL */ #include // clang-format off #ifndef __has_feature #define __has_feature(x) 0 #endif #if defined(__clang__) #if !defined(COMPILER_CLANG) #define COMPILER_CLANG #endif #elif defined(_MSC_VER) #if !defined(COMPILER_MSVC) #define COMPILER_MSVC #endif #elif defined(__GNUC__) #if !defined(COMPILER_GCC) #define COMPILER_GCC #endif #endif #if __has_feature(cxx_attributes) #define BENCHMARK_NORETURN [[noreturn]] #elif defined(__GNUC__) #define BENCHMARK_NORETURN __attribute__((noreturn)) #elif defined(COMPILER_MSVC) #define BENCHMARK_NORETURN __declspec(noreturn) #else #define BENCHMARK_NORETURN #endif #if defined(__CYGWIN__) #define BENCHMARK_OS_CYGWIN 1 #elif defined(_WIN32) #define BENCHMARK_OS_WINDOWS 1 // WINAPI_FAMILY_PARTITION is defined in winapifamily.h. // We include windows.h which implicitly includes winapifamily.h for compatibility. #ifndef NOMINMAX #define NOMINMAX #endif #include #if defined(WINAPI_FAMILY_PARTITION) #if WINAPI_FAMILY_PARTITION(WINAPI_PARTITION_DESKTOP) #define BENCHMARK_OS_WINDOWS_WIN32 1 #elif WINAPI_FAMILY_PARTITION(WINAPI_PARTITION_APP) #define BENCHMARK_OS_WINDOWS_RT 1 #endif #endif #if defined(__MINGW32__) #define BENCHMARK_OS_MINGW 1 #endif #elif defined(__APPLE__) #define BENCHMARK_OS_APPLE 1 #include "TargetConditionals.h" #if defined(TARGET_OS_MAC) #define BENCHMARK_OS_MACOSX 1 #if defined(TARGET_OS_IPHONE) #define BENCHMARK_OS_IOS 1 #endif #endif #elif defined(__FreeBSD__) #define BENCHMARK_OS_FREEBSD 1 #elif defined(__NetBSD__) #define BENCHMARK_OS_NETBSD 1 #elif defined(__OpenBSD__) #define BENCHMARK_OS_OPENBSD 1 #elif defined(__DragonFly__) #define BENCHMARK_OS_DRAGONFLY 1 #elif defined(__linux__) #define BENCHMARK_OS_LINUX 1 #elif defined(__native_client__) #define BENCHMARK_OS_NACL 1 #elif defined(__EMSCRIPTEN__) #define BENCHMARK_OS_EMSCRIPTEN 1 #elif defined(__wasi__) #define BENCHMARK_OS_WASI 1 #elif defined(__rtems__) #define BENCHMARK_OS_RTEMS 1 #elif defined(__Fuchsia__) #define BENCHMARK_OS_FUCHSIA 1 #elif defined (__SVR4) && defined (__sun) #define BENCHMARK_OS_SOLARIS 1 #elif defined(__QNX__) #define BENCHMARK_OS_QNX 1 #elif defined(__MVS__) #define BENCHMARK_OS_ZOS 1 #elif defined(__hexagon__) #define BENCHMARK_OS_QURT 1 #endif #if defined(__ANDROID__) && defined(__GLIBCXX__) #define BENCHMARK_STL_ANDROID_GNUSTL 1 #endif #if !__has_feature(cxx_exceptions) && !defined(__cpp_exceptions) \ && !defined(__EXCEPTIONS) #define BENCHMARK_HAS_NO_EXCEPTIONS #endif #if defined(COMPILER_CLANG) || defined(COMPILER_GCC) #define BENCHMARK_MAYBE_UNUSED __attribute__((unused)) #else #define BENCHMARK_MAYBE_UNUSED #endif #if defined(__GNUC__) || defined(__clang__) #define PRINTF_FORMAT_STRING_FUNC(format_arg, first_idx) \ __attribute__((format(printf, format_arg, first_idx))) #elif defined(__MINGW32__) #define PRINTF_FORMAT_STRING_FUNC(format_arg, first_idx) \ __attribute__((format(__MINGW_PRINTF_FORMAT, format_arg, first_idx))) #else #define PRINTF_FORMAT_STRING_FUNC(format_arg, first_idx) #endif // clang-format on #endif // BENCHMARK_INTERNAL_MACROS_H_ ================================================ FILE: src/json_reporter.cc ================================================ // Copyright 2015 Google Inc. All rights reserved. // // Licensed under the Apache License, Version 2.0 (the "License"); // you may not use this file except in compliance with the License. // You may obtain a copy of the License at // // http://www.apache.org/licenses/LICENSE-2.0 // // Unless required by applicable law or agreed to in writing, software // distributed under the License is distributed on an "AS IS" BASIS, // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. // See the License for the specific language governing permissions and // limitations under the License. #include #include #include #include // for setprecision #include #include #include #include #include #include "benchmark/benchmark_api.h" #include "benchmark/export.h" #include "benchmark/reporter.h" #include "benchmark/types.h" #include "complexity.h" #include "string_util.h" #include "timers.h" namespace benchmark { namespace { std::string StrEscape(const std::string& s) { std::string tmp; tmp.reserve(s.size()); for (char c : s) { switch (c) { case '\b': tmp += "\\b"; break; case '\f': tmp += "\\f"; break; case '\n': tmp += "\\n"; break; case '\r': tmp += "\\r"; break; case '\t': tmp += "\\t"; break; case '\\': tmp += "\\\\"; break; case '"': tmp += "\\\""; break; default: tmp += c; break; } } return tmp; } std::string FormatKV(std::string const& key, std::string const& value) { return StrFormat("\"%s\": \"%s\"", StrEscape(key).c_str(), StrEscape(value).c_str()); } std::string FormatKV(std::string const& key, const char* value) { return StrFormat("\"%s\": \"%s\"", StrEscape(key).c_str(), StrEscape(value).c_str()); } std::string FormatKV(std::string const& key, bool value) { return StrFormat("\"%s\": %s", StrEscape(key).c_str(), value ? "true" : "false"); } std::string FormatKV(std::string const& key, int64_t value) { std::stringstream ss; // We really want to just dump the integer as-is, // without the system locale interfering. ss << '"' << StrEscape(key) << "\": " << std::to_string(value); return ss.str(); } std::string FormatKV(std::string const& key, int value) { return FormatKV(key, static_cast(value)); } std::string FormatKV(std::string const& key, double value) { std::stringstream ss; ss << '"' << StrEscape(key) << "\": "; if (std::isnan(value)) { ss << (value < 0 ? "-" : "") << "NaN"; } else if (std::isinf(value)) { ss << (value < 0 ? "-" : "") << "Infinity"; } else { const auto max_digits10 = std::numeric_limits::max_digits10; const auto max_fractional_digits10 = max_digits10 - 1; ss << std::scientific << std::setprecision(max_fractional_digits10) << value; } return ss.str(); } int64_t RoundDouble(double v) { return std::lround(v); } } // end namespace bool JSONReporter::ReportContext(const Context& context) { std::ostream& out = GetOutputStream(); out << "{\n"; std::string inner_indent(2, ' '); // Open context block and print context information. out << inner_indent << "\"context\": {\n"; std::string indent(4, ' '); std::string walltime_value = LocalDateTimeString(); out << indent << FormatKV("date", walltime_value) << ",\n"; out << indent << FormatKV("host_name", context.sys_info.name) << ",\n"; if (Context::executable_name != nullptr) { out << indent << FormatKV("executable", Context::executable_name) << ",\n"; } CPUInfo const& info = context.cpu_info; out << indent << FormatKV("num_cpus", static_cast(info.num_cpus)) << ",\n"; out << indent << FormatKV("mhz_per_cpu", RoundDouble(info.cycles_per_second / 1000000.0)) << ",\n"; if (CPUInfo::Scaling::UNKNOWN != info.scaling) { out << indent << FormatKV("cpu_scaling_enabled", info.scaling == CPUInfo::Scaling::ENABLED) << ",\n"; } const SystemInfo& sysinfo = context.sys_info; if (SystemInfo::ASLR::UNKNOWN != sysinfo.ASLRStatus) { out << indent << FormatKV("aslr_enabled", sysinfo.ASLRStatus == SystemInfo::ASLR::ENABLED) << ",\n"; } out << indent << "\"caches\": [\n"; indent = std::string(6, ' '); std::string cache_indent(8, ' '); for (size_t i = 0; i < info.caches.size(); ++i) { const auto& CI = info.caches[i]; out << indent << "{\n"; out << cache_indent << FormatKV("type", CI.type) << ",\n"; out << cache_indent << FormatKV("level", static_cast(CI.level)) << ",\n"; out << cache_indent << FormatKV("size", static_cast(CI.size)) << ",\n"; out << cache_indent << FormatKV("num_sharing", static_cast(CI.num_sharing)) << "\n"; out << indent << "}"; if (i != info.caches.size() - 1) { out << ","; } out << "\n"; } indent = std::string(4, ' '); out << indent << "],\n"; out << indent << "\"load_avg\": ["; for (auto it = info.load_avg.begin(); it != info.load_avg.end();) { out << *it++; if (it != info.load_avg.end()) { out << ","; } } out << "],\n"; out << indent << FormatKV("library_version", GetBenchmarkVersion()); out << ",\n"; #if defined(NDEBUG) const char build_type[] = "release"; #else const char build_type[] = "debug"; #endif out << indent << FormatKV("library_build_type", build_type); out << ",\n"; // NOTE: our json schema is not strictly tied to the library version! out << indent << FormatKV("json_schema_version", 1); std::map* global_context = internal::GetGlobalContext(); if (global_context != nullptr) { for (const auto& kv : *global_context) { out << ",\n"; out << indent << FormatKV(kv.first, kv.second); } } out << "\n"; // Close context block and open the list of benchmarks. out << inner_indent << "},\n"; out << inner_indent << "\"benchmarks\": [\n"; return true; } void JSONReporter::ReportRuns(std::vector const& reports) { if (reports.empty()) { return; } std::string indent(4, ' '); std::ostream& out = GetOutputStream(); if (!first_report_) { out << ",\n"; } first_report_ = false; for (auto it = reports.begin(); it != reports.end(); ++it) { out << indent << "{\n"; PrintRunData(*it); out << indent << '}'; auto it_cp = it; if (++it_cp != reports.end()) { out << ",\n"; } } } void JSONReporter::Finalize() { // Close the list of benchmarks and the top level object. GetOutputStream() << "\n ]\n}\n"; } void JSONReporter::PrintRunData(Run const& run) { std::string indent(6, ' '); std::ostream& out = GetOutputStream(); out << indent << FormatKV("name", run.benchmark_name()) << ",\n"; out << indent << FormatKV("family_index", run.family_index) << ",\n"; out << indent << FormatKV("per_family_instance_index", run.per_family_instance_index) << ",\n"; out << indent << FormatKV("run_name", run.run_name.str()) << ",\n"; out << indent << FormatKV("run_type", [&run]() -> const char* { switch (run.run_type) { case BenchmarkReporter::Run::RT_Iteration: return "iteration"; case BenchmarkReporter::Run::RT_Aggregate: return "aggregate"; } BENCHMARK_UNREACHABLE(); }()) << ",\n"; out << indent << FormatKV("repetitions", run.repetitions) << ",\n"; if (run.run_type != BenchmarkReporter::Run::RT_Aggregate) { out << indent << FormatKV("repetition_index", run.repetition_index) << ",\n"; } out << indent << FormatKV("threads", run.threads) << ",\n"; if (run.run_type == BenchmarkReporter::Run::RT_Aggregate) { out << indent << FormatKV("aggregate_name", run.aggregate_name) << ",\n"; out << indent << FormatKV("aggregate_unit", [&run]() -> const char* { switch (run.aggregate_unit) { case StatisticUnit::kTime: return "time"; case StatisticUnit::kPercentage: return "percentage"; } BENCHMARK_UNREACHABLE(); }()) << ",\n"; } if (internal::SkippedWithError == run.skipped) { out << indent << FormatKV("error_occurred", true) << ",\n"; out << indent << FormatKV("error_message", run.skip_message) << ",\n"; } else if (internal::SkippedWithMessage == run.skipped) { out << indent << FormatKV("skipped", true) << ",\n"; out << indent << FormatKV("skip_message", run.skip_message) << ",\n"; } if (!run.report_big_o && !run.report_rms) { out << indent << FormatKV("iterations", run.iterations) << ",\n"; if (run.run_type != Run::RT_Aggregate || run.aggregate_unit == StatisticUnit::kTime) { out << indent << FormatKV("real_time", run.GetAdjustedRealTime()) << ",\n"; out << indent << FormatKV("cpu_time", run.GetAdjustedCPUTime()); } else { assert(run.aggregate_unit == StatisticUnit::kPercentage); out << indent << FormatKV("real_time", run.real_accumulated_time) << ",\n"; out << indent << FormatKV("cpu_time", run.cpu_accumulated_time); } out << ",\n" << indent << FormatKV("time_unit", GetTimeUnitString(run.time_unit)); } else if (run.report_big_o) { out << indent << FormatKV("cpu_coefficient", run.GetAdjustedCPUTime()) << ",\n"; out << indent << FormatKV("real_coefficient", run.GetAdjustedRealTime()) << ",\n"; out << indent << FormatKV("big_o", GetBigOString(run.complexity)) << ",\n"; out << indent << FormatKV("time_unit", GetTimeUnitString(run.time_unit)); } else if (run.report_rms) { out << indent << FormatKV("rms", run.GetAdjustedCPUTime()); } for (const auto& c : run.counters) { out << ",\n" << indent << FormatKV(c.first, c.second); } if (run.memory_result.memory_iterations > 0) { const auto& memory_result = run.memory_result; out << ",\n" << indent << FormatKV("allocs_per_iter", run.allocs_per_iter); out << ",\n" << indent << FormatKV("max_bytes_used", memory_result.max_bytes_used); auto report_if_present = [&out, &indent](const std::string& label, int64_t val) { if (val != MemoryManager::TombstoneValue) { out << ",\n" << indent << FormatKV(label, val); } }; report_if_present("total_allocated_bytes", memory_result.total_allocated_bytes); report_if_present("net_heap_growth", memory_result.net_heap_growth); } if (!run.report_label.empty()) { out << ",\n" << indent << FormatKV("label", run.report_label); } out << '\n'; } } // end namespace benchmark ================================================ FILE: src/log.h ================================================ #ifndef BENCHMARK_LOG_H_ #define BENCHMARK_LOG_H_ #include #include namespace benchmark { namespace internal { typedef std::basic_ostream&(EndLType)(std::basic_ostream&); class LogType { friend LogType& GetNullLogInstance(); friend LogType& GetErrorLogInstance(); // FIXME: Add locking to output. template friend LogType& operator<<(LogType&, Tp const&); friend LogType& operator<<(LogType&, EndLType*); private: LogType(std::ostream* out) : out_(out) {} std::ostream* out_; // NOTE: we could use BENCHMARK_DISALLOW_COPY_AND_ASSIGN but we shouldn't have // a dependency on benchmark.h from here. LogType(const LogType&) = delete; LogType& operator=(const LogType&) = delete; }; template LogType& operator<<(LogType& log, Tp const& value) { if (log.out_) { *log.out_ << value; } return log; } inline LogType& operator<<(LogType& log, EndLType* m) { if (log.out_) { *log.out_ << m; } return log; } inline int& LogLevel() { static int log_level = 0; return log_level; } inline LogType& GetNullLogInstance() { static LogType null_log(static_cast(nullptr)); return null_log; } inline LogType& GetErrorLogInstance() { static LogType error_log(&std::clog); return error_log; } inline LogType& GetLogInstanceForLevel(int level) { if (level <= LogLevel()) { return GetErrorLogInstance(); } return GetNullLogInstance(); } } // end namespace internal } // end namespace benchmark // clang-format off #define BM_VLOG(x) \ (::benchmark::internal::GetLogInstanceForLevel(x) << "-- LOG(" << x << "):" \ " ") // clang-format on #endif ================================================ FILE: src/mutex.h ================================================ #ifndef BENCHMARK_MUTEX_H_ #define BENCHMARK_MUTEX_H_ #include #include #include "check.h" // Enable thread safety attributes only with clang. // The attributes can be safely erased when compiling with other compilers. #if defined(HAVE_THREAD_SAFETY_ATTRIBUTES) #define THREAD_ANNOTATION_ATTRIBUTE_(x) __attribute__((x)) #else #define THREAD_ANNOTATION_ATTRIBUTE_(x) // no-op #endif #define CAPABILITY(x) THREAD_ANNOTATION_ATTRIBUTE_(capability(x)) #define SCOPED_CAPABILITY THREAD_ANNOTATION_ATTRIBUTE_(scoped_lockable) #define GUARDED_BY(x) THREAD_ANNOTATION_ATTRIBUTE_(guarded_by(x)) #define PT_GUARDED_BY(x) THREAD_ANNOTATION_ATTRIBUTE_(pt_guarded_by(x)) #define ACQUIRED_BEFORE(...) \ THREAD_ANNOTATION_ATTRIBUTE_(acquired_before(__VA_ARGS__)) #define ACQUIRED_AFTER(...) \ THREAD_ANNOTATION_ATTRIBUTE_(acquired_after(__VA_ARGS__)) #define REQUIRES(...) \ THREAD_ANNOTATION_ATTRIBUTE_(requires_capability(__VA_ARGS__)) #define REQUIRES_SHARED(...) \ THREAD_ANNOTATION_ATTRIBUTE_(requires_shared_capability(__VA_ARGS__)) #define ACQUIRE(...) \ THREAD_ANNOTATION_ATTRIBUTE_(acquire_capability(__VA_ARGS__)) #define ACQUIRE_SHARED(...) \ THREAD_ANNOTATION_ATTRIBUTE_(acquire_shared_capability(__VA_ARGS__)) #define RELEASE(...) \ THREAD_ANNOTATION_ATTRIBUTE_(release_capability(__VA_ARGS__)) #define RELEASE_SHARED(...) \ THREAD_ANNOTATION_ATTRIBUTE_(release_shared_capability(__VA_ARGS__)) #define TRY_ACQUIRE(...) \ THREAD_ANNOTATION_ATTRIBUTE_(try_acquire_capability(__VA_ARGS__)) #define TRY_ACQUIRE_SHARED(...) \ THREAD_ANNOTATION_ATTRIBUTE_(try_acquire_shared_capability(__VA_ARGS__)) #define EXCLUDES(...) THREAD_ANNOTATION_ATTRIBUTE_(locks_excluded(__VA_ARGS__)) #define ASSERT_CAPABILITY(x) THREAD_ANNOTATION_ATTRIBUTE_(assert_capability(x)) #define ASSERT_SHARED_CAPABILITY(x) \ THREAD_ANNOTATION_ATTRIBUTE_(assert_shared_capability(x)) #define RETURN_CAPABILITY(x) THREAD_ANNOTATION_ATTRIBUTE_(lock_returned(x)) #define NO_THREAD_SAFETY_ANALYSIS \ THREAD_ANNOTATION_ATTRIBUTE_(no_thread_safety_analysis) namespace benchmark { typedef std::condition_variable Condition; // NOTE: Wrappers for std::mutex and std::unique_lock are provided so that // we can annotate them with thread safety attributes and use the // -Wthread-safety warning with clang. The standard library types cannot be // used directly because they do not provide the required annotations. class CAPABILITY("mutex") Mutex { public: Mutex() {} void lock() ACQUIRE() { mut_.lock(); } void unlock() RELEASE() { mut_.unlock(); } std::mutex& native_handle() { return mut_; } private: std::mutex mut_; }; class SCOPED_CAPABILITY MutexLock { typedef std::unique_lock MutexLockImp; public: MutexLock(Mutex& m) ACQUIRE(m) : ml_(m.native_handle()) {} ~MutexLock() RELEASE() {} MutexLockImp& native_handle() { return ml_; } private: MutexLockImp ml_; }; class Barrier { public: Barrier(int num_threads) : running_threads_(num_threads) {} // Called by each thread bool wait() EXCLUDES(lock_) { bool last_thread = false; { MutexLock ml(lock_); last_thread = createBarrier(ml); } if (last_thread) phase_condition_.notify_all(); return last_thread; } void removeThread() EXCLUDES(lock_) { MutexLock ml(lock_); --running_threads_; if (entered_ != 0) phase_condition_.notify_all(); } private: Mutex lock_; Condition phase_condition_; int running_threads_; // State for barrier management int phase_number_ = 0; int entered_ = 0; // Number of threads that have entered this barrier // Enter the barrier and wait until all other threads have also // entered the barrier. Returns iff this is the last thread to // enter the barrier. bool createBarrier(MutexLock& ml) REQUIRES(lock_) { BM_CHECK_LT(entered_, running_threads_); entered_++; if (entered_ < running_threads_) { // Wait for all threads to enter int phase_number_cp = phase_number_; auto cb = [this, phase_number_cp]() { return this->phase_number_ > phase_number_cp || entered_ == running_threads_; // A thread has aborted in error }; phase_condition_.wait(ml.native_handle(), cb); if (phase_number_ > phase_number_cp) return false; // else (running_threads_ == entered_) and we are the last thread. } // Last thread has reached the barrier phase_number_++; entered_ = 0; return true; } }; } // end namespace benchmark #endif // BENCHMARK_MUTEX_H_ ================================================ FILE: src/perf_counters.cc ================================================ // Copyright 2021 Google Inc. All rights reserved. // // Licensed under the Apache License, Version 2.0 (the "License"); // you may not use this file except in compliance with the License. // You may obtain a copy of the License at // // http://www.apache.org/licenses/LICENSE-2.0 // // Unless required by applicable law or agreed to in writing, software // distributed under the License is distributed on an "AS IS" BASIS, // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. // See the License for the specific language governing permissions and // limitations under the License. #include "perf_counters.h" #include #include #include #include #if defined HAVE_LIBPFM #include #include #include #include #include "perfmon/pfmlib.h" #include "perfmon/pfmlib_perf_event.h" #endif namespace benchmark { namespace internal { #if defined HAVE_LIBPFM size_t PerfCounterValues::Read(const std::vector& leaders) { // Create a pointer for multiple reads const size_t bufsize = values_.size() * sizeof(values_[0]); char* ptr = reinterpret_cast(values_.data()); size_t size = bufsize; for (int lead : leaders) { auto read_bytes = ::read(lead, ptr, size); if (read_bytes >= ssize_t(sizeof(uint64_t))) { // Actual data bytes are all bytes minus initial padding std::size_t data_bytes = static_cast(read_bytes) - sizeof(uint64_t); // This should be very cheap since it's in hot cache std::memmove(ptr, ptr + sizeof(uint64_t), data_bytes); // Increment our counters ptr += data_bytes; size -= data_bytes; } else { int err = errno; GetErrorLogInstance() << "Error reading lead " << lead << " errno:" << err << " " << ::strerror(err) << "\n"; return 0; } } return (bufsize - size) / sizeof(uint64_t); } const bool PerfCounters::kSupported = true; // Initializes libpfm only on the first call. Returns whether that single // initialization was successful. bool PerfCounters::Initialize() { // Function-scope static gets initialized only once on first call. static const bool success = []() { return pfm_initialize() == PFM_SUCCESS; }(); return success; } bool PerfCounters::IsCounterSupported(const std::string& name) { Initialize(); perf_event_attr attr; std::memset(&attr, 0, sizeof(attr)); pfm_perf_encode_arg_t arg; std::memset(&arg, 0, sizeof(arg)); arg.attr = &attr; const int mode = PFM_PLM3; // user mode only int ret = pfm_get_os_event_encoding(name.c_str(), mode, PFM_OS_PERF_EVENT_EXT, &arg); return (ret == PFM_SUCCESS); } static std::optional> QueryCPUPMUTypes() { std::vector types; DIR* dir = opendir("/sys/bus/event_source/devices"); if (!dir) { return std::nullopt; } while (dirent* ent = readdir(dir)) { std::string_view name_str = ent->d_name; auto node_path = [&](const char* node) { return std::string("/sys/bus/event_source/devices/") + ent->d_name + "/" + node; }; struct stat st; if (name_str == "cpu" || name_str == "cpum_cf" || stat(node_path("cpus").c_str(), &st) == 0 || errno != ENOENT) { int type_fd = open(node_path("type").c_str(), O_RDONLY); if (type_fd < 0) { closedir(dir); return std::nullopt; } char type_str[32] = {}; ssize_t res = read(type_fd, type_str, sizeof(type_str) - 1); close(type_fd); if (res < 0) { closedir(dir); return std::nullopt; } uint64_t type; if (sscanf(type_str, "%" PRIu64, &type) != 1) { closedir(dir); return std::nullopt; } types.push_back(type); } } closedir(dir); return types; } static std::vector GetPMUTypesForEvent(const perf_event_attr& attr) { // Replicate generic hardware events on all CPU PMUs. if (attr.type == PERF_TYPE_HARDWARE && attr.config < PERF_COUNT_HW_MAX) { if (auto types = QueryCPUPMUTypes()) { return *types; } } return {0}; } PerfCounters PerfCounters::Create( const std::vector& counter_names) { if (!counter_names.empty()) { Initialize(); } // Valid counters will populate these arrays but we start empty std::vector valid_names; std::vector counter_ids; std::vector leader_ids; // Resize to the maximum possible valid_names.reserve(counter_names.size()); counter_ids.reserve(counter_names.size()); const int kCounterMode = PFM_PLM3; // user mode only // Group leads will be assigned on demand. The idea is that once we cannot // create a counter descriptor, the reason is that this group has maxed out // so we set the group_id again to -1 and retry - giving the algorithm a // chance to create a new group leader to hold the next set of counters. int group_id = -1; // Loop through all performance counters for (size_t i = 0; i < counter_names.size(); ++i) { // we are about to push into the valid names vector // check if we did not reach the maximum if (valid_names.size() == PerfCounterValues::kMaxCounters) { // Log a message if we maxed out and stop adding GetErrorLogInstance() << counter_names.size() << " counters were requested. The maximum is " << PerfCounterValues::kMaxCounters << " and " << valid_names.size() << " were already added. All remaining counters will be ignored\n"; // stop the loop and return what we have already break; } // Check if this name is empty const auto& name = counter_names[i]; if (name.empty()) { GetErrorLogInstance() << "A performance counter name was the empty string\n"; continue; } // Here first means first in group, ie the group leader const bool is_first = (group_id < 0); // This struct will be populated by libpfm from the counter string // and then fed into the syscall perf_event_open struct perf_event_attr attr {}; attr.size = sizeof(attr); // This is the input struct to libpfm. pfm_perf_encode_arg_t arg{}; arg.attr = &attr; const int pfm_get = pfm_get_os_event_encoding(name.c_str(), kCounterMode, PFM_OS_PERF_EVENT, &arg); if (pfm_get != PFM_SUCCESS) { GetErrorLogInstance() << "Unknown performance counter name: " << name << "\n"; continue; } // We then proceed to populate the remaining fields in our attribute struct // Note: the man page for perf_event_create suggests inherit = true and // read_format = PERF_FORMAT_GROUP don't work together, but that's not the // case. attr.disabled = is_first; attr.inherit = true; attr.pinned = is_first; attr.exclude_kernel = true; attr.exclude_user = false; attr.exclude_hv = true; // Read all counters in a group in one read. attr.read_format = PERF_FORMAT_GROUP; //| PERF_FORMAT_TOTAL_TIME_ENABLED | // PERF_FORMAT_TOTAL_TIME_RUNNING; uint64_t base_config = attr.config; for (uint64_t pmu : GetPMUTypesForEvent(attr)) { attr.config = (pmu << PERF_PMU_TYPE_SHIFT) | base_config; int id = -1; while (id < 0) { static constexpr size_t kNrOfSyscallRetries = 5; // Retry syscall as it was interrupted often (b/64774091). for (size_t num_retries = 0; num_retries < kNrOfSyscallRetries; ++num_retries) { id = perf_event_open(&attr, 0, -1, group_id, 0); if (id >= 0 || errno != EINTR) { break; } } if (id < 0) { // If the file descriptor is negative we might have reached a limit // in the current group. Set the group_id to -1 and retry if (group_id >= 0) { // Create a new group group_id = -1; } else { // At this point we have already retried to set a new group id and // failed. We then give up. break; } } } // We failed to get a new file descriptor. We might have reached a hard // hardware limit that cannot be resolved even with group multiplexing if (id < 0) { GetErrorLogInstance() << "***WARNING** Failed to get a file descriptor " "for performance counter " << name << ". Ignoring\n"; // We give up on this counter but try to keep going // as the others would be fine continue; } if (group_id < 0) { // This is a leader, store and assign it to the current file descriptor leader_ids.push_back(id); group_id = id; } // This is a valid counter, add it to our descriptor's list counter_ids.push_back(id); valid_names.push_back(name); } } // Loop through all group leaders activating them // There is another option of starting ALL counters in a process but // that would be far reaching an intrusion. If the user is using PMCs // by themselves then this would have a side effect on them. It is // friendlier to loop through all groups individually. for (int lead : leader_ids) { if (ioctl(lead, PERF_EVENT_IOC_ENABLE) != 0) { // This should never happen but if it does, we give up on the // entire batch as recovery would be a mess. GetErrorLogInstance() << "***WARNING*** Failed to start counters. " "Clearing out all counters.\n"; // Close all performance counters for (int id : counter_ids) { ::close(id); } // Return an empty object so our internal state is still good and // the process can continue normally without impact return NoCounters(); } } return PerfCounters(std::move(valid_names), std::move(counter_ids), std::move(leader_ids)); } void PerfCounters::CloseCounters() const { if (counter_ids_.empty()) { return; } for (int lead : leader_ids_) { ioctl(lead, PERF_EVENT_IOC_DISABLE); } for (int fd : counter_ids_) { close(fd); } } #else // defined HAVE_LIBPFM size_t PerfCounterValues::Read(const std::vector&) { return 0; } const bool PerfCounters::kSupported = false; bool PerfCounters::Initialize() { return false; } bool PerfCounters::IsCounterSupported(const std::string&) { return false; } PerfCounters PerfCounters::Create( const std::vector& counter_names) { if (!counter_names.empty()) { GetErrorLogInstance() << "Performance counters not supported.\n"; } return NoCounters(); } void PerfCounters::CloseCounters() const {} #endif // defined HAVE_LIBPFM PerfCountersMeasurement::PerfCountersMeasurement( const std::vector& counter_names) : start_values_(counter_names.size()), end_values_(counter_names.size()) { counters_ = PerfCounters::Create(counter_names); } PerfCounters& PerfCounters::operator=(PerfCounters&& other) noexcept { if (this != &other) { CloseCounters(); counter_ids_ = std::move(other.counter_ids_); leader_ids_ = std::move(other.leader_ids_); counter_names_ = std::move(other.counter_names_); } return *this; } } // namespace internal } // namespace benchmark ================================================ FILE: src/perf_counters.h ================================================ // Copyright 2021 Google Inc. All rights reserved. // // Licensed under the Apache License, Version 2.0 (the "License"); // you may not use this file except in compliance with the License. // You may obtain a copy of the License at // // http://www.apache.org/licenses/LICENSE-2.0 // // Unless required by applicable law or agreed to in writing, software // distributed under the License is distributed on an "AS IS" BASIS, // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. // See the License for the specific language governing permissions and // limitations under the License. #ifndef BENCHMARK_PERF_COUNTERS_H #define BENCHMARK_PERF_COUNTERS_H #include #include #include #include #include #include #include "benchmark/export.h" #include "benchmark/macros.h" #include "benchmark/utils.h" #include "check.h" #include "log.h" #include "mutex.h" #ifndef BENCHMARK_OS_WINDOWS #include #endif #if defined(_MSC_VER) #pragma warning(push) // C4251: needs to have dll-interface to be used by clients of class #pragma warning(disable : 4251) #endif namespace benchmark { namespace internal { // Typically, we can only read a small number of counters. There is also a // padding preceding counter values, when reading multiple counters with one // syscall (which is desirable). PerfCounterValues abstracts these details. // The implementation ensures the storage is inlined, and allows 0-based // indexing into the counter values. // The object is used in conjunction with a PerfCounters object, by passing it // to Snapshot(). The Read() method relocates individual reads, discarding // the initial padding from each group leader in the values buffer such that // all user accesses through the [] operator are correct. class BENCHMARK_EXPORT PerfCounterValues { public: explicit PerfCounterValues(size_t nr_counters) : nr_counters_(nr_counters) { BM_CHECK_LE(nr_counters_, kMaxCounters); } // We are reading correctly now so the values don't need to skip padding uint64_t operator[](size_t pos) const { return values_[pos]; } // Increased the maximum to 32 only since the buffer // is std::array<> backed static constexpr size_t kMaxCounters = 32; private: friend class PerfCounters; // Get the byte buffer in which perf counters can be captured. // This is used by PerfCounters::Read std::pair get_data_buffer() { return {reinterpret_cast(values_.data()), sizeof(uint64_t) * (kPadding + nr_counters_)}; } // This reading is complex and as the goal of this class is to // abstract away the intrincacies of the reading process, this is // a better place for it size_t Read(const std::vector& leaders); // Move the padding to 2 due to the reading algorithm (1st padding plus a // current read padding) static constexpr size_t kPadding = 2; std::array values_; const size_t nr_counters_; }; // Collect PMU counters. The object, once constructed, is ready to be used by // calling read(). PMU counter collection is enabled from the time create() is // called, to obtain the object, until the object's destructor is called. class BENCHMARK_EXPORT PerfCounters final { public: // True iff this platform supports performance counters. static const bool kSupported; // Returns an empty object static PerfCounters NoCounters() { return PerfCounters(); } ~PerfCounters() { CloseCounters(); } PerfCounters() = default; PerfCounters(PerfCounters&&) = default; PerfCounters(const PerfCounters&) = delete; PerfCounters& operator=(PerfCounters&&) noexcept; PerfCounters& operator=(const PerfCounters&) = delete; // Platform-specific implementations may choose to do some library // initialization here. static bool Initialize(); // Check if the given counter is supported, if the app wants to // check before passing static bool IsCounterSupported(const std::string& name); // Return a PerfCounters object ready to read the counters with the names // specified. The values are user-mode only. The counter name format is // implementation and OS specific. // In case of failure, this method will in the worst case return an // empty object whose state will still be valid. static PerfCounters Create(const std::vector& counter_names); // Take a snapshot of the current value of the counters into the provided // valid PerfCounterValues storage. The values are populated such that: // names()[i]'s value is (*values)[i] BENCHMARK_ALWAYS_INLINE bool Snapshot(PerfCounterValues* values) const { #ifndef BENCHMARK_OS_WINDOWS assert(values != nullptr); return values->Read(leader_ids_) == counter_ids_.size(); #else (void)values; return false; #endif } const std::vector& names() const { return counter_names_; } size_t num_counters() const { return counter_names_.size(); } private: PerfCounters(const std::vector& counter_names, std::vector&& counter_ids, std::vector&& leader_ids) : counter_ids_(std::move(counter_ids)), leader_ids_(std::move(leader_ids)), counter_names_(counter_names) {} void CloseCounters() const; std::vector counter_ids_; std::vector leader_ids_; std::vector counter_names_; }; // Typical usage of the above primitives. class BENCHMARK_EXPORT PerfCountersMeasurement final { public: PerfCountersMeasurement(const std::vector& counter_names); size_t num_counters() const { return counters_.num_counters(); } const std::vector& names() const { return counters_.names(); } BENCHMARK_ALWAYS_INLINE bool Start() { if (num_counters() == 0) return true; // Tell the compiler to not move instructions above/below where we take // the snapshot. ClobberMemory(); valid_read_ &= counters_.Snapshot(&start_values_); ClobberMemory(); return valid_read_; } BENCHMARK_ALWAYS_INLINE bool Stop( std::vector>& measurements) { if (num_counters() == 0) return true; // Tell the compiler to not move instructions above/below where we take // the snapshot. ClobberMemory(); valid_read_ &= counters_.Snapshot(&end_values_); ClobberMemory(); for (size_t i = 0; i < counters_.names().size(); ++i) { double measurement = static_cast(end_values_[i]) - static_cast(start_values_[i]); measurements.push_back({counters_.names()[i], measurement}); } return valid_read_; } private: PerfCounters counters_; bool valid_read_ = true; PerfCounterValues start_values_; PerfCounterValues end_values_; }; } // namespace internal } // namespace benchmark #if defined(_MSC_VER) #pragma warning(pop) #endif #endif // BENCHMARK_PERF_COUNTERS_H ================================================ FILE: src/re.h ================================================ // Copyright 2015 Google Inc. All rights reserved. // // Licensed under the Apache License, Version 2.0 (the "License"); // you may not use this file except in compliance with the License. // You may obtain a copy of the License at // // http://www.apache.org/licenses/LICENSE-2.0 // // Unless required by applicable law or agreed to in writing, software // distributed under the License is distributed on an "AS IS" BASIS, // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. // See the License for the specific language governing permissions and // limitations under the License. #ifndef BENCHMARK_RE_H_ #define BENCHMARK_RE_H_ #include #include "internal_macros.h" // clang-format off #if !defined(HAVE_STD_REGEX) && \ !defined(HAVE_GNU_POSIX_REGEX) && \ !defined(HAVE_POSIX_REGEX) // No explicit regex selection; detect based on builtin hints. #if defined(BENCHMARK_OS_LINUX) || defined(BENCHMARK_OS_APPLE) #define HAVE_POSIX_REGEX 1 #elif __cplusplus >= 199711L #define HAVE_STD_REGEX 1 #endif #endif // Prefer C regex libraries when compiling w/o exceptions so that we can // correctly report errors. #if defined(BENCHMARK_HAS_NO_EXCEPTIONS) && \ defined(HAVE_STD_REGEX) && \ (defined(HAVE_GNU_POSIX_REGEX) || defined(HAVE_POSIX_REGEX)) #undef HAVE_STD_REGEX #endif #if defined(HAVE_STD_REGEX) #include #elif defined(HAVE_GNU_POSIX_REGEX) #include #elif defined(HAVE_POSIX_REGEX) #include #else #error No regular expression backend was found! #endif // clang-format on #include #include "check.h" namespace benchmark { // A wrapper around the POSIX regular expression API that provides automatic // cleanup class Regex { public: Regex() : init_(false) {} ~Regex(); // Compile a regular expression matcher from spec. Returns true on success. // // On failure (and if error is not nullptr), error is populated with a human // readable error message if an error occurs. bool Init(const std::string& spec, std::string* error); // Returns whether str matches the compiled regular expression. bool Match(const std::string& str); private: bool init_; // Underlying regular expression object #if defined(HAVE_STD_REGEX) std::regex re_; #elif defined(HAVE_POSIX_REGEX) || defined(HAVE_GNU_POSIX_REGEX) regex_t re_; #else #error No regular expression backend implementation available #endif }; #if defined(HAVE_STD_REGEX) inline bool Regex::Init(const std::string& spec, std::string* error) { #ifdef BENCHMARK_HAS_NO_EXCEPTIONS ((void)error); // suppress unused warning #else try { #endif re_ = std::regex(spec, std::regex_constants::extended); init_ = true; #ifndef BENCHMARK_HAS_NO_EXCEPTIONS } catch (const std::regex_error& e) { if (error) { *error = e.what(); } } #endif return init_; } inline Regex::~Regex() {} inline bool Regex::Match(const std::string& str) { if (!init_) { return false; } return std::regex_search(str, re_); } #else inline bool Regex::Init(const std::string& spec, std::string* error) { int ec = regcomp(&re_, spec.c_str(), REG_EXTENDED | REG_NOSUB); if (ec != 0) { if (error) { size_t needed = regerror(ec, &re_, nullptr, 0); std::vector errbuf(needed); regerror(ec, &re_, errbuf.data(), needed); // regerror returns the number of bytes necessary to null terminate // the string, so we move that when assigning to error. BM_CHECK_NE(needed, 0); error->assign(errbuf.data(), needed - 1); } return false; } init_ = true; return true; } inline Regex::~Regex() { if (init_) { regfree(&re_); } } inline bool Regex::Match(const std::string& str) { if (!init_) { return false; } return regexec(&re_, str.c_str(), 0, nullptr, 0) == 0; } #endif } // end namespace benchmark #endif // BENCHMARK_RE_H_ ================================================ FILE: src/reporter.cc ================================================ // Copyright 2015 Google Inc. All rights reserved. // // Licensed under the Apache License, Version 2.0 (the "License"); // you may not use this file except in compliance with the License. // You may obtain a copy of the License at // // http://www.apache.org/licenses/LICENSE-2.0 // // Unless required by applicable law or agreed to in writing, software // distributed under the License is distributed on an "AS IS" BASIS, // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. // See the License for the specific language governing permissions and // limitations under the License. #include "benchmark/reporter.h" #include #include #include #include #include #include #include "benchmark/benchmark_api.h" #include "benchmark/sysinfo.h" #include "check.h" #include "string_util.h" #include "timers.h" namespace benchmark { BenchmarkReporter::BenchmarkReporter() : output_stream_(&std::cout), error_stream_(&std::cerr) {} BenchmarkReporter::~BenchmarkReporter() {} void BenchmarkReporter::PrintBasicContext(std::ostream* out, Context const& context) { BM_CHECK(out) << "cannot be null"; auto& Out = *out; #ifndef BENCHMARK_OS_QURT // Date/time information is not available on QuRT. // Attempting to get it via this call cause the binary to crash. Out << LocalDateTimeString() << "\n"; #endif if (benchmark::BenchmarkReporter::Context::executable_name != nullptr) { Out << "Running " << benchmark::BenchmarkReporter::Context::executable_name << "\n"; } const CPUInfo& info = context.cpu_info; Out << "Run on (" << info.num_cpus << " X " << (info.cycles_per_second / 1000000.0) << " MHz CPU " << ((info.num_cpus > 1) ? "s" : "") << ")\n"; if (!info.caches.empty()) { Out << "CPU Caches:\n"; for (const auto& CInfo : info.caches) { Out << " L" << CInfo.level << " " << CInfo.type << " " << (CInfo.size / 1024) << " KiB"; if (CInfo.num_sharing != 0) { Out << " (x" << (info.num_cpus / CInfo.num_sharing) << ")"; } Out << "\n"; } } if (!info.load_avg.empty()) { Out << "Load Average: "; for (auto It = info.load_avg.begin(); It != info.load_avg.end();) { Out << StrFormat("%.2f", *It++); if (It != info.load_avg.end()) { Out << ", "; } } Out << "\n"; } std::map* global_context = internal::GetGlobalContext(); if (global_context != nullptr) { for (const auto& kv : *global_context) { Out << kv.first << ": " << kv.second << "\n"; } } if (CPUInfo::Scaling::ENABLED == info.scaling) { Out << "***WARNING*** CPU scaling is enabled, the benchmark " "real time measurements may be noisy and will incur extra " "overhead.\n"; } const SystemInfo& sysinfo = context.sys_info; if (SystemInfo::ASLR::ENABLED == sysinfo.ASLRStatus) { Out << "***WARNING*** ASLR is enabled, the results may have unreproducible " "noise in them.\n"; } #ifndef NDEBUG Out << "***WARNING*** Library was built as DEBUG. Timings may be " "affected.\n"; #endif } // No initializer because it's already initialized to NULL. const char* BenchmarkReporter::Context::executable_name; BenchmarkReporter::Context::Context() : cpu_info(CPUInfo::Get()), sys_info(SystemInfo::Get()) {} std::string BenchmarkReporter::Run::benchmark_name() const { std::string name = run_name.str(); if (run_type == RT_Aggregate) { name += "_" + aggregate_name; } return name; } double BenchmarkReporter::Run::GetAdjustedRealTime() const { double new_time = real_accumulated_time * GetTimeUnitMultiplier(time_unit); if (iterations != 0) { new_time /= static_cast(iterations); } return new_time; } double BenchmarkReporter::Run::GetAdjustedCPUTime() const { double new_time = cpu_accumulated_time * GetTimeUnitMultiplier(time_unit); if (iterations != 0) { new_time /= static_cast(iterations); } return new_time; } } // end namespace benchmark ================================================ FILE: src/statistics.cc ================================================ // Copyright 2016 Ismael Jimenez Martinez. All rights reserved. // Copyright 2017 Roman Lebedev. All rights reserved. // // Licensed under the Apache License, Version 2.0 (the "License"); // you may not use this file except in compliance with the License. // You may obtain a copy of the License at // // http://www.apache.org/licenses/LICENSE-2.0 // // Unless required by applicable law or agreed to in writing, software // distributed under the License is distributed on an "AS IS" BASIS, // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. // See the License for the specific language governing permissions and // limitations under the License. #include "statistics.h" #include #include #include #include #include #include "benchmark/reporter.h" #include "benchmark/statistics.h" #include "benchmark/types.h" #include "check.h" namespace benchmark { const auto StatisticsSum = [](const std::vector& v) { return std::accumulate(v.begin(), v.end(), 0.0); }; double StatisticsMean(const std::vector& v) { if (v.empty()) { return 0.0; } return StatisticsSum(v) * (1.0 / static_cast(v.size())); } double StatisticsMedian(const std::vector& v) { if (v.size() < 3) { return StatisticsMean(v); } std::vector copy(v); auto center = copy.begin() + v.size() / 2; std::nth_element(copy.begin(), center, copy.end()); // Did we have an odd number of samples? If yes, then center is the median. // If not, then we are looking for the average between center and the value // before. Instead of resorting, we just look for the max value before it, // which is not necessarily the element immediately preceding `center` Since // `copy` is only partially sorted by `nth_element`. if (v.size() % 2 == 1) { return *center; } auto center2 = std::max_element(copy.begin(), center); return (*center + *center2) / 2.0; } // Return the sum of the squares of this sample set const auto SumSquares = [](const std::vector& v) { return std::inner_product(v.begin(), v.end(), v.begin(), 0.0); }; const auto Sqr = [](const double dat) { return dat * dat; }; const auto Sqrt = [](const double dat) { // Avoid NaN due to imprecision in the calculations if (dat < 0.0) { return 0.0; } return std::sqrt(dat); }; double StatisticsStdDev(const std::vector& v) { const auto mean = StatisticsMean(v); if (v.empty()) { return mean; } // Sample standard deviation is undefined for n = 1 if (v.size() == 1) { return 0.0; } const double avg_squares = SumSquares(v) * (1.0 / static_cast(v.size())); return Sqrt(static_cast(v.size()) / (static_cast(v.size()) - 1.0) * (avg_squares - Sqr(mean))); } double StatisticsCV(const std::vector& v) { if (v.size() < 2) { return 0.0; } const auto stddev = StatisticsStdDev(v); const auto mean = StatisticsMean(v); if (std::fpclassify(mean) == FP_ZERO) { return 0.0; } return stddev / mean; } std::vector ComputeStats( const std::vector& reports) { typedef BenchmarkReporter::Run Run; std::vector results; auto error_count = std::count_if(reports.begin(), reports.end(), [](Run const& run) { return run.skipped; }); if (reports.size() - static_cast(error_count) < 2) { // We don't report aggregated data if there was a single run. return results; } // Accumulators. std::vector real_accumulated_time_stat; std::vector cpu_accumulated_time_stat; real_accumulated_time_stat.reserve(reports.size()); cpu_accumulated_time_stat.reserve(reports.size()); // All repetitions should be run with the same number of iterations so we // can take this information from the first benchmark. const IterationCount run_iterations = reports.front().iterations; // create stats for user counters struct CounterStat { Counter c; std::vector s; }; std::map counter_stats; for (Run const& r : reports) { for (auto const& cnt : r.counters) { auto it = counter_stats.find(cnt.first); if (it == counter_stats.end()) { it = counter_stats .emplace(cnt.first, CounterStat{cnt.second, std::vector{}}) .first; it->second.s.reserve(reports.size()); } else { BM_CHECK_EQ(it->second.c.flags, cnt.second.flags); } } } // Populate the accumulators. for (Run const& run : reports) { BM_CHECK_EQ(reports[0].benchmark_name(), run.benchmark_name()); BM_CHECK_EQ(run_iterations, run.iterations); if (run.skipped != 0u) { continue; } real_accumulated_time_stat.emplace_back(run.real_accumulated_time); cpu_accumulated_time_stat.emplace_back(run.cpu_accumulated_time); // user counters for (auto const& cnt : run.counters) { auto it = counter_stats.find(cnt.first); BM_CHECK_NE(it, counter_stats.end()); it->second.s.emplace_back(cnt.second); } } // Only add label if it is same for all runs std::string report_label = reports[0].report_label; for (std::size_t i = 1; i < reports.size(); i++) { if (reports[i].report_label != report_label) { report_label = ""; break; } } const double iteration_rescale_factor = static_cast(reports.size()) / static_cast(run_iterations); for (const auto& Stat : *reports[0].statistics) { // Get the data from the accumulator to BenchmarkReporter::Run's. Run data; data.run_name = reports[0].run_name; data.family_index = reports[0].family_index; data.per_family_instance_index = reports[0].per_family_instance_index; data.run_type = BenchmarkReporter::Run::RT_Aggregate; data.threads = reports[0].threads; data.repetitions = reports[0].repetitions; data.repetition_index = Run::no_repetition_index; data.aggregate_name = Stat.name_; data.aggregate_unit = Stat.unit_; data.report_label = report_label; // It is incorrect to say that an aggregate is computed over // run's iterations, because those iterations already got averaged. // Similarly, if there are N repetitions with 1 iterations each, // an aggregate will be computed over N measurements, not 1. // Thus it is best to simply use the count of separate reports. data.iterations = static_cast(reports.size()); data.real_accumulated_time = Stat.compute_(real_accumulated_time_stat); data.cpu_accumulated_time = Stat.compute_(cpu_accumulated_time_stat); if (data.aggregate_unit == StatisticUnit::kTime) { // We will divide these times by data.iterations when reporting, but the // data.iterations is not necessarily the scale of these measurements, // because in each repetition, these timers are sum over all the iters. // And if we want to say that the stats are over N repetitions and not // M iterations, we need to multiply these by (N/M). data.real_accumulated_time *= iteration_rescale_factor; data.cpu_accumulated_time *= iteration_rescale_factor; } data.time_unit = reports[0].time_unit; // user counters for (auto const& kv : counter_stats) { // Do *NOT* rescale the custom counters. They are already properly scaled. const auto uc_stat = Stat.compute_(kv.second.s); auto c = Counter(uc_stat, counter_stats[kv.first].c.flags, counter_stats[kv.first].c.oneK); data.counters[kv.first] = c; } results.push_back(data); } return results; } } // end namespace benchmark ================================================ FILE: src/statistics.h ================================================ // Copyright 2016 Ismael Jimenez Martinez. All rights reserved. // Copyright 2017 Roman Lebedev. All rights reserved. // // Licensed under the Apache License, Version 2.0 (the "License"); // you may not use this file except in compliance with the License. // You may obtain a copy of the License at // // http://www.apache.org/licenses/LICENSE-2.0 // // Unless required by applicable law or agreed to in writing, software // distributed under the License is distributed on an "AS IS" BASIS, // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. // See the License for the specific language governing permissions and // limitations under the License. #ifndef STATISTICS_H_ #define STATISTICS_H_ #include #include "benchmark/export.h" #include "benchmark/reporter.h" namespace benchmark { // Return a vector containing the mean, median and standard deviation // information (and any user-specified info) for the specified list of reports. // If 'reports' contains less than two non-errored runs an empty vector is // returned BENCHMARK_EXPORT std::vector ComputeStats( const std::vector& reports); BENCHMARK_EXPORT double StatisticsMean(const std::vector& v); BENCHMARK_EXPORT double StatisticsMedian(const std::vector& v); BENCHMARK_EXPORT double StatisticsStdDev(const std::vector& v); BENCHMARK_EXPORT double StatisticsCV(const std::vector& v); } // end namespace benchmark #endif // STATISTICS_H_ ================================================ FILE: src/string_util.cc ================================================ #include "string_util.h" #include #ifdef BENCHMARK_STL_ANDROID_GNUSTL #include #endif #include #include #include #include #include #include "arraysize.h" #include "benchmark/types.h" namespace benchmark { namespace { // kilo, Mega, Giga, Tera, Peta, Exa, Zetta, Yotta. const char* const kBigSIUnits[] = {"k", "M", "G", "T", "P", "E", "Z", "Y"}; // Kibi, Mebi, Gibi, Tebi, Pebi, Exbi, Zebi, Yobi. const char* const kBigIECUnits[] = {"Ki", "Mi", "Gi", "Ti", "Pi", "Ei", "Zi", "Yi"}; // milli, micro, nano, pico, femto, atto, zepto, yocto. const char* const kSmallSIUnits[] = {"m", "u", "n", "p", "f", "a", "z", "y"}; // We require that all three arrays have the same size. static_assert(arraysize(kBigSIUnits) == arraysize(kBigIECUnits), "SI and IEC unit arrays must be the same size"); static_assert(arraysize(kSmallSIUnits) == arraysize(kBigSIUnits), "Small SI and Big SI unit arrays must be the same size"); const int64_t kUnitsSize = arraysize(kBigSIUnits); std::pair ToExponentAndMantissa(double val, int precision, double one_k) { std::string mantissa; int64_t exponent = 0; if (val < 0) { mantissa = "-"; val = -val; } else { mantissa.clear(); } // Adjust threshold so that it never excludes things which can't be rendered // in 'precision' digits. const double adjusted_threshold = std::max(1.0, 1.0 / std::pow(10.0, precision)); const double big_threshold = (adjusted_threshold * one_k) - 1; const double small_threshold = adjusted_threshold; // Values in ]simple_threshold,small_threshold[ will be printed as-is const double simple_threshold = 0.01; auto format_mantissa = [&](double v) { mantissa += StrFormat("%g", v); }; // Positive powers if (val > big_threshold) { double scaled = val; for (size_t i = 0; i < arraysize(kBigSIUnits); ++i) { scaled /= one_k; if (scaled <= big_threshold) { format_mantissa(scaled); exponent = static_cast(i + 1); return std::make_pair(mantissa, exponent); } } format_mantissa(val); exponent = 0; return std::make_pair(mantissa, exponent); } // Negative powers if (val < small_threshold) { if (val < simple_threshold) { double scaled = val; for (size_t i = 0; i < arraysize(kSmallSIUnits); ++i) { scaled *= one_k; if (scaled >= small_threshold) { format_mantissa(scaled); exponent = -static_cast(i + 1); return std::make_pair(mantissa, exponent); } } } format_mantissa(val); exponent = 0; return std::make_pair(mantissa, exponent); } format_mantissa(val); exponent = 0; return std::make_pair(mantissa, exponent); } std::string ExponentToPrefix(int64_t exponent, bool iec) { if (exponent == 0) { return {}; } const int64_t index = (exponent > 0 ? exponent - 1 : -exponent - 1); if (index >= kUnitsSize) { return {}; } const char* const* array = (exponent > 0 ? (iec ? kBigIECUnits : kBigSIUnits) : kSmallSIUnits); return std::string(array[index]); } std::string ToBinaryStringFullySpecified(double value, int precision, Counter::OneK one_k) { auto [mantissa, exponent] = ToExponentAndMantissa( value, precision, one_k == Counter::kIs1024 ? 1024.0 : 1000.0); return mantissa + ExponentToPrefix(exponent, one_k == Counter::kIs1024); } PRINTF_FORMAT_STRING_FUNC(1, 0) std::string StrFormatImp(const char* msg, va_list args) { // we might need a second shot at this, so pre-emptivly make a copy va_list args_cp; va_copy(args_cp, args); // Use std::array for first attempt to avoid one memory allocation guess what // the size might be std::array local_buff = {}; // 2015-10-08: vsnprintf is used instead of snd::vsnprintf due to a limitation // in the android-ndk auto ret = vsnprintf(local_buff.data(), local_buff.size(), msg, args_cp); va_end(args_cp); // handle empty expansion if (ret == 0) { return {}; } if (static_cast(ret) < local_buff.size()) { return std::string(local_buff.data()); } // we did not provide a long enough buffer on our first attempt. // add 1 to size to account for null-byte in size cast to prevent overflow std::size_t size = static_cast(ret) + 1; auto buff_ptr = std::unique_ptr(new char[size]); // 2015-10-08: vsnprintf is used instead of snd::vsnprintf due to a limitation // in the android-ndk va_list args_cp2; va_copy(args_cp2, args); vsnprintf(buff_ptr.get(), size, msg, args_cp2); va_end(args_cp2); return std::string(buff_ptr.get()); } } // end namespace std::string HumanReadableNumber(double n, Counter::OneK one_k) { return ToBinaryStringFullySpecified(n, 1, one_k); } std::string StrFormat(const char* format, ...) { va_list args; va_start(args, format); std::string tmp = StrFormatImp(format, args); va_end(args); return tmp; } std::vector StrSplit(const std::string& str, char delim) { if (str.empty()) { return {}; } std::vector ret; size_t first = 0; size_t next = str.find(delim); for (; next != std::string::npos; first = next + 1, next = str.find(delim, first)) { ret.push_back(str.substr(first, next - first)); } ret.push_back(str.substr(first)); return ret; } #ifdef BENCHMARK_STL_ANDROID_GNUSTL /* * GNU STL in Android NDK lacks support for some C++11 functions, including * stoul, stoi, stod. We reimplement them here using C functions strtoul, * strtol, strtod. Note that reimplemented functions are in benchmark:: * namespace, not std:: namespace. */ unsigned long stoul(const std::string& str, size_t* pos, int base) { /* Record previous errno */ const int oldErrno = errno; errno = 0; const char* strStart = str.c_str(); char* strEnd = const_cast(strStart); const unsigned long result = strtoul(strStart, &strEnd, base); const int strtoulErrno = errno; /* Restore previous errno */ errno = oldErrno; /* Check for errors and return */ if (strtoulErrno == ERANGE) { throw std::out_of_range("stoul failed: " + str + " is outside of range of unsigned long"); } else if (strEnd == strStart || strtoulErrno != 0) { throw std::invalid_argument("stoul failed: " + str + " is not an integer"); } if (pos != nullptr) { *pos = static_cast(strEnd - strStart); } return result; } int stoi(const std::string& str, size_t* pos, int base) { /* Record previous errno */ const int oldErrno = errno; errno = 0; const char* strStart = str.c_str(); char* strEnd = const_cast(strStart); const long result = strtol(strStart, &strEnd, base); const int strtolErrno = errno; /* Restore previous errno */ errno = oldErrno; /* Check for errors and return */ if (strtolErrno == ERANGE || long(int(result)) != result) { throw std::out_of_range("stoul failed: " + str + " is outside of range of int"); } else if (strEnd == strStart || strtolErrno != 0) { throw std::invalid_argument("stoul failed: " + str + " is not an integer"); } if (pos != nullptr) { *pos = static_cast(strEnd - strStart); } return int(result); } double stod(const std::string& str, size_t* pos) { /* Record previous errno */ const int oldErrno = errno; errno = 0; const char* strStart = str.c_str(); char* strEnd = const_cast(strStart); const double result = strtod(strStart, &strEnd); /* Restore previous errno */ const int strtodErrno = errno; errno = oldErrno; /* Check for errors and return */ if (strtodErrno == ERANGE) { throw std::out_of_range("stoul failed: " + str + " is outside of range of int"); } else if (strEnd == strStart || strtodErrno != 0) { throw std::invalid_argument("stoul failed: " + str + " is not an integer"); } if (pos != nullptr) { *pos = static_cast(strEnd - strStart); } return result; } #endif } // end namespace benchmark ================================================ FILE: src/string_util.h ================================================ #ifndef BENCHMARK_STRING_UTIL_H_ #define BENCHMARK_STRING_UTIL_H_ #include #include #include #include #include "benchmark/counter.h" #include "benchmark/export.h" #include "check.h" namespace benchmark { BENCHMARK_EXPORT std::string HumanReadableNumber(double n, Counter::OneK one_k); BENCHMARK_EXPORT std::string StrFormat(const char* format, ...) PRINTF_FORMAT_STRING_FUNC(1, 2); inline std::ostream& StrCatImp(std::ostream& out) BENCHMARK_NOEXCEPT { return out; } template inline std::ostream& StrCatImp(std::ostream& out, First&& f, Rest&&... rest) { out << std::forward(f); return StrCatImp(out, std::forward(rest)...); } template inline std::string StrCat(Args&&... args) { std::ostringstream ss; StrCatImp(ss, std::forward(args)...); return ss.str(); } BENCHMARK_EXPORT std::vector StrSplit(const std::string& str, char delim); // Disable lint checking for this block since it re-implements C functions. // NOLINTBEGIN #ifdef BENCHMARK_STL_ANDROID_GNUSTL /* * GNU STL in Android NDK lacks support for some C++11 functions, including * stoul, stoi, stod. We reimplement them here using C functions strtoul, * strtol, strtod. Note that reimplemented functions are in benchmark:: * namespace, not std:: namespace. */ unsigned long stoul(const std::string& str, size_t* pos = nullptr, int base = 10); int stoi(const std::string& str, size_t* pos = nullptr, int base = 10); double stod(const std::string& str, size_t* pos = nullptr); #else using std::stod; // NOLINT(misc-unused-using-decls) using std::stoi; // NOLINT(misc-unused-using-decls) using std::stoul; // NOLINT(misc-unused-using-decls) #endif // NOLINTEND } // end namespace benchmark #endif // BENCHMARK_STRING_UTIL_H_ ================================================ FILE: src/sysinfo.cc ================================================ // Copyright 2015 Google Inc. All rights reserved. // // Licensed under the Apache License, Version 2.0 (the "License"); // you may not use this file except in compliance with the License. // You may obtain a copy of the License at // // http://www.apache.org/licenses/LICENSE-2.0 // // Unless required by applicable law or agreed to in writing, software // distributed under the License is distributed on an "AS IS" BASIS, // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. // See the License for the specific language governing permissions and // limitations under the License. #include "internal_macros.h" #ifdef BENCHMARK_OS_WINDOWS #if !defined(WINVER) || WINVER < 0x0600 #undef WINVER #define WINVER 0x0600 #endif // WINVER handling #include #undef StrCat // Don't let StrCat in string_util.h be renamed to lstrcatA #include #include #include #else #include #if !defined(BENCHMARK_OS_FUCHSIA) && !defined(BENCHMARK_OS_QURT) && \ !defined(BENCHMARK_OS_WASI) #include #endif #include #include // this header must be included before 'sys/sysctl.h' to avoid compilation error on FreeBSD #include #if defined BENCHMARK_OS_FREEBSD || defined BENCHMARK_OS_MACOSX || \ defined BENCHMARK_OS_NETBSD || defined BENCHMARK_OS_OPENBSD || \ defined BENCHMARK_OS_DRAGONFLY #define BENCHMARK_HAS_SYSCTL #include #endif #endif #if defined(BENCHMARK_OS_SOLARIS) #include #include #endif #if defined(BENCHMARK_OS_QNX) #include #endif #if defined(BENCHMARK_OS_QURT) #include #endif #if defined(BENCHMARK_HAS_PTHREAD_AFFINITY) #if defined(BENCHMARK_OS_FREEBSD) #include #endif #include #endif #if defined(BENCHMARK_OS_LINUX) #include #endif #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include "benchmark/export.h" #include "benchmark/sysinfo.h" #include "benchmark/utils.h" #include "check.h" #include "cycleclock.h" #include "log.h" #include "string_util.h" #include "timers.h" namespace benchmark { namespace { void PrintImp(std::ostream& out) { out << '\n'; } template void PrintImp(std::ostream& out, First&& f, Rest&&... rest) { out << std::forward(f); PrintImp(out, std::forward(rest)...); } template BENCHMARK_NORETURN void PrintErrorAndDie(Args&&... args) { PrintImp(std::cerr, std::forward(args)...); std::cerr << std::flush; std::exit(EXIT_FAILURE); } #ifdef BENCHMARK_HAS_SYSCTL /// ValueUnion - A type used to correctly alias the byte-for-byte output of /// `sysctl` with the result type it's to be interpreted as. struct ValueUnion { union DataT { int32_t int32_value; int64_t int64_value; // For correct aliasing of union members from bytes. char bytes[8]; }; using DataPtr = std::unique_ptr; // The size of the data union member + its trailing array size. std::size_t size; DataPtr buff; public: ValueUnion() : size(0), buff(nullptr, &std::free) {} explicit ValueUnion(std::size_t buff_size) : size(sizeof(DataT) + buff_size), buff(::new(std::malloc(size)) DataT(), &std::free) {} ValueUnion(ValueUnion&& other) = default; explicit operator bool() const { return bool(buff); } char* data() const { return buff->bytes; } std::string GetAsString() const { return std::string(data()); } int64_t GetAsInteger() const { if (size == sizeof(buff->int32_value)) return buff->int32_value; else if (size == sizeof(buff->int64_value)) return buff->int64_value; BENCHMARK_UNREACHABLE(); } template std::array GetAsArray() { const int arr_size = sizeof(T) * N; BM_CHECK_LE(arr_size, size); std::array arr; std::memcpy(arr.data(), data(), arr_size); return arr; } }; ValueUnion GetSysctlImp(std::string const& name) { #if defined BENCHMARK_OS_OPENBSD int mib[2]; mib[0] = CTL_HW; if ((name == "hw.ncpuonline") || (name == "hw.cpuspeed")) { ValueUnion buff(sizeof(int)); if (name == "hw.ncpuonline") { mib[1] = HW_NCPUONLINE; } else { mib[1] = HW_CPUSPEED; } if (sysctl(mib, 2, buff.data(), &buff.size, nullptr, 0) == -1) { return ValueUnion(); } return buff; } return ValueUnion(); #else std::size_t cur_buff_size = 0; if (sysctlbyname(name.c_str(), nullptr, &cur_buff_size, nullptr, 0) == -1) return ValueUnion(); ValueUnion buff(cur_buff_size); if (sysctlbyname(name.c_str(), buff.data(), &buff.size, nullptr, 0) == 0) return buff; return ValueUnion(); #endif } BENCHMARK_MAYBE_UNUSED bool GetSysctl(std::string const& name, std::string* out) { out->clear(); auto buff = GetSysctlImp(name); if (!buff) return false; out->assign(buff.data()); return true; } template ::value>::type> bool GetSysctl(std::string const& name, Tp* out) { *out = 0; auto buff = GetSysctlImp(name); if (!buff) return false; *out = static_cast(buff.GetAsInteger()); return true; } template bool GetSysctl(std::string const& name, std::array* out) { auto buff = GetSysctlImp(name); if (!buff) return false; *out = buff.GetAsArray(); return true; } #endif template bool ReadFromFile(std::string const& fname, ArgT* arg) { *arg = ArgT(); std::ifstream f(fname.c_str()); if (!f.is_open()) { return false; } f >> *arg; return f.good(); } CPUInfo::Scaling CpuScaling(int num_cpus) { // We don't have a valid CPU count, so don't even bother. if (num_cpus <= 0) { return CPUInfo::Scaling::UNKNOWN; } #if defined(BENCHMARK_OS_QNX) return CPUInfo::Scaling::UNKNOWN; #elif !defined(BENCHMARK_OS_WINDOWS) // On Linux, the CPUfreq subsystem exposes CPU information as files on the // local file system. If reading the exported files fails, then we may not be // running on Linux, so we silently ignore all the read errors. std::string res; for (int cpu = 0; cpu < num_cpus; ++cpu) { std::string governor_file = StrCat("/sys/devices/system/cpu/cpu", cpu, "/cpufreq/scaling_governor"); if (ReadFromFile(governor_file, &res) && res != "performance") { return CPUInfo::Scaling::ENABLED; } } return CPUInfo::Scaling::DISABLED; #else return CPUInfo::Scaling::UNKNOWN; #endif } int CountSetBitsInCPUMap(std::string val) { auto CountBits = [](std::string part) { using CPUMask = std::bitset; part = "0x" + part; CPUMask mask(benchmark::stoul(part, nullptr, 16)); return static_cast(mask.count()); }; std::size_t pos = 0; int total = 0; while ((pos = val.find(',')) != std::string::npos) { total += CountBits(val.substr(0, pos)); val = val.substr(pos + 1); } if (!val.empty()) { total += CountBits(val); } return total; } BENCHMARK_MAYBE_UNUSED std::vector GetCacheSizesFromKVFS() { std::vector res; std::string dir = "/sys/devices/system/cpu/cpu0/cache/"; int idx = 0; while (true) { CPUInfo::CacheInfo info; std::string fpath = StrCat(dir, "index", idx++, "/"); std::ifstream f(StrCat(fpath, "size").c_str()); if (!f.is_open()) { break; } std::string suffix; f >> info.size; if (f.fail()) { PrintErrorAndDie("Failed while reading file '", fpath, "size'"); } if (f.good()) { f >> suffix; if (f.bad()) { PrintErrorAndDie( "Invalid cache size format: failed to read size suffix"); } else if (f && suffix != "K") { PrintErrorAndDie("Invalid cache size format: Expected bytes ", suffix); } else if (suffix == "K") { info.size *= 1024; } } if (!ReadFromFile(StrCat(fpath, "type"), &info.type)) { PrintErrorAndDie("Failed to read from file ", fpath, "type"); } if (!ReadFromFile(StrCat(fpath, "level"), &info.level)) { PrintErrorAndDie("Failed to read from file ", fpath, "level"); } std::string map_str; if (!ReadFromFile(StrCat(fpath, "shared_cpu_map"), &map_str)) { PrintErrorAndDie("Failed to read from file ", fpath, "shared_cpu_map"); } info.num_sharing = CountSetBitsInCPUMap(map_str); res.push_back(info); } return res; } #ifdef BENCHMARK_OS_MACOSX std::vector GetCacheSizesMacOSX() { std::vector res; std::array cache_counts{{0, 0, 0, 0}}; GetSysctl("hw.cacheconfig", &cache_counts); struct { std::string name; std::string type; int level; int num_sharing; } cases[] = {{"hw.l1dcachesize", "Data", 1, cache_counts[1]}, {"hw.l1icachesize", "Instruction", 1, cache_counts[1]}, {"hw.l2cachesize", "Unified", 2, cache_counts[2]}, {"hw.l3cachesize", "Unified", 3, cache_counts[3]}}; for (auto& c : cases) { int val; if (!GetSysctl(c.name, &val)) continue; CPUInfo::CacheInfo info; info.type = c.type; info.level = c.level; info.size = val; info.num_sharing = c.num_sharing; res.push_back(std::move(info)); } return res; } #elif defined(BENCHMARK_OS_WINDOWS) std::vector GetCacheSizesWindows() { std::vector res; DWORD buffer_size = 0; using PInfo = SYSTEM_LOGICAL_PROCESSOR_INFORMATION; using CInfo = CACHE_DESCRIPTOR; using UPtr = std::unique_ptr; GetLogicalProcessorInformation(nullptr, &buffer_size); UPtr buff(static_cast(std::malloc(buffer_size)), &std::free); if (!GetLogicalProcessorInformation(buff.get(), &buffer_size)) { PrintErrorAndDie("Failed during call to GetLogicalProcessorInformation: ", GetLastError()); } PInfo* it = buff.get(); PInfo* end = buff.get() + (buffer_size / sizeof(PInfo)); for (; it != end; ++it) { if (it->Relationship != RelationCache) { continue; } using BitSet = std::bitset; BitSet b(it->ProcessorMask); // To prevent duplicates, only consider caches where CPU 0 is specified if (!b.test(0)) continue; const CInfo& cache = it->Cache; CPUInfo::CacheInfo C; C.num_sharing = static_cast(b.count()); C.level = cache.Level; C.size = static_cast(cache.Size); C.type = "Unknown"; switch (cache.Type) { // Windows SDK version >= 10.0.26100.0 #ifdef NTDDI_WIN11_GE case CacheUnknown: break; #endif case CacheUnified: C.type = "Unified"; break; case CacheInstruction: C.type = "Instruction"; break; case CacheData: C.type = "Data"; break; case CacheTrace: C.type = "Trace"; break; } res.push_back(C); } return res; } #elif BENCHMARK_OS_QNX std::vector GetCacheSizesQNX() { std::vector res; struct cacheattr_entry* cache = SYSPAGE_ENTRY(cacheattr); uint32_t const elsize = SYSPAGE_ELEMENT_SIZE(cacheattr); int num = SYSPAGE_ENTRY_SIZE(cacheattr) / elsize; for (int i = 0; i < num; ++i) { CPUInfo::CacheInfo info; switch (cache->flags) { case CACHE_FLAG_INSTR: info.type = "Instruction"; info.level = 1; break; case CACHE_FLAG_DATA: info.type = "Data"; info.level = 1; break; case CACHE_FLAG_UNIFIED: info.type = "Unified"; info.level = 2; break; case CACHE_FLAG_SHARED: info.type = "Shared"; info.level = 3; break; default: continue; break; } info.size = cache->line_size * cache->num_lines; info.num_sharing = 0; res.push_back(std::move(info)); cache = SYSPAGE_ARRAY_ADJ_OFFSET(cacheattr, cache, elsize); } return res; } #endif std::vector GetCacheSizes() { #ifdef BENCHMARK_OS_MACOSX return GetCacheSizesMacOSX(); #elif defined(BENCHMARK_OS_WINDOWS) return GetCacheSizesWindows(); #elif defined(BENCHMARK_OS_QNX) return GetCacheSizesQNX(); #elif defined(BENCHMARK_OS_QURT) || defined(BENCHMARK_OS_EMSCRIPTEN) || \ defined(BENCHMARK_OS_WASI) return std::vector(); #else return GetCacheSizesFromKVFS(); #endif } std::string GetSystemName() { #if defined(BENCHMARK_OS_WINDOWS) std::string str; static constexpr int COUNT = MAX_COMPUTERNAME_LENGTH + 1; TCHAR hostname[COUNT] = {'\0'}; DWORD DWCOUNT = COUNT; if (!GetComputerName(hostname, &DWCOUNT)) return std::string(""); #ifndef UNICODE str = std::string(hostname, DWCOUNT); #else // `WideCharToMultiByte` returns `0` when conversion fails. int len = WideCharToMultiByte(CP_UTF8, WC_ERR_INVALID_CHARS, hostname, DWCOUNT, NULL, 0, NULL, NULL); str.resize(len); WideCharToMultiByte(CP_UTF8, WC_ERR_INVALID_CHARS, hostname, DWCOUNT, &str[0], static_cast(str.size()), NULL, NULL); #endif return str; #elif defined(BENCHMARK_OS_QURT) std::string str = "Hexagon DSP"; qurt_arch_version_t arch_version_struct; if (qurt_sysenv_get_arch_version(&arch_version_struct) == QURT_EOK) { str += " v"; str += std::to_string(arch_version_struct.arch_version); } return str; #elif defined(BENCHMARK_OS_WASI) return std::string("wasi"); #else #ifndef HOST_NAME_MAX #ifdef BENCHMARK_HAS_SYSCTL // BSD/Mac doesn't have HOST_NAME_MAX defined #define HOST_NAME_MAX 64 #elif defined(BENCHMARK_OS_NACL) #define HOST_NAME_MAX 64 #elif defined(BENCHMARK_OS_QNX) #define HOST_NAME_MAX 154 #elif defined(BENCHMARK_OS_RTEMS) #define HOST_NAME_MAX 256 #elif defined(BENCHMARK_OS_SOLARIS) #define HOST_NAME_MAX MAXHOSTNAMELEN #elif defined(BENCHMARK_OS_ZOS) #define HOST_NAME_MAX _POSIX_HOST_NAME_MAX #else #pragma message("HOST_NAME_MAX not defined. using 64") #define HOST_NAME_MAX 64 #endif #endif // def HOST_NAME_MAX char hostname[HOST_NAME_MAX]; int retVal = gethostname(hostname, HOST_NAME_MAX); return retVal != 0 ? std::string() : std::string(hostname); #endif // Catch-all POSIX block. } SystemInfo::ASLR GetASLR() { #ifdef BENCHMARK_OS_LINUX const auto curr_personality = personality(0xffffffff); return (curr_personality & ADDR_NO_RANDOMIZE) ? SystemInfo::ASLR::DISABLED : SystemInfo::ASLR::ENABLED; #else // FIXME: support detecting ASLR on other OS. return SystemInfo::ASLR::UNKNOWN; #endif } int GetNumCPUsImpl() { #ifdef BENCHMARK_OS_WINDOWS SYSTEM_INFO sysinfo; // Use memset as opposed to = {} to avoid GCC missing initializer false // positives. std::memset(&sysinfo, 0, sizeof(SYSTEM_INFO)); GetSystemInfo(&sysinfo); // number of logical processors in the current group return static_cast(sysinfo.dwNumberOfProcessors); #elif defined(BENCHMARK_OS_QNX) return static_cast(_syspage_ptr->num_cpu); #elif defined(BENCHMARK_OS_QURT) qurt_sysenv_max_hthreads_t hardware_threads; if (qurt_sysenv_get_max_hw_threads(&hardware_threads) != QURT_EOK) { hardware_threads.max_hthreads = 1; } return static_cast(hardware_threads.max_hthreads); #elif defined(BENCHMARK_HAS_SYSCTL) // *BSD, macOS int num_cpu = -1; constexpr auto* hwncpu = #if defined BENCHMARK_OS_MACOSX "hw.logicalcpu"; #elif defined(HW_NCPUONLINE) "hw.ncpuonline"; #else "hw.ncpu"; #endif if (GetSysctl(hwncpu, &num_cpu)) return num_cpu; PrintErrorAndDie("Err: ", strerror(errno)); #elif defined(_SC_NPROCESSORS_ONLN) // Linux, Solaris, AIX, Haiku, WASM, etc. // Returns -1 in case of a failure. int num_cpu = static_cast(sysconf(_SC_NPROCESSORS_ONLN)); if (num_cpu < 0) { PrintErrorAndDie("sysconf(_SC_NPROCESSORS_ONLN) failed with error: ", strerror(errno)); } return num_cpu; #else // Fallback, no other API exists. return -1; #endif BENCHMARK_UNREACHABLE(); } int GetNumCPUs() { int num_cpus = GetNumCPUsImpl(); if (num_cpus < 1) { std::cerr << "Unable to extract number of CPUs.\n"; // There must be at least one CPU on which we're running. num_cpus = 1; } return num_cpus; } class ThreadAffinityGuard final { public: ThreadAffinityGuard() : reset_affinity(SetAffinity()) { if (!reset_affinity) { std::cerr << "***WARNING*** Failed to set thread affinity. Estimated CPU " "frequency may be incorrect.\n"; } } ~ThreadAffinityGuard() { if (!reset_affinity) { return; } #if defined(BENCHMARK_HAS_PTHREAD_AFFINITY) int ret = pthread_setaffinity_np(self, sizeof(previous_affinity), &previous_affinity); if (ret == 0) { return; } #elif defined(BENCHMARK_OS_WINDOWS_WIN32) DWORD_PTR ret = SetThreadAffinityMask(self, previous_affinity); if (ret != 0) { return; } #endif // def BENCHMARK_HAS_PTHREAD_AFFINITY PrintErrorAndDie("Failed to reset thread affinity"); } ThreadAffinityGuard(ThreadAffinityGuard&&) = delete; ThreadAffinityGuard(const ThreadAffinityGuard&) = delete; ThreadAffinityGuard& operator=(ThreadAffinityGuard&&) = delete; ThreadAffinityGuard& operator=(const ThreadAffinityGuard&) = delete; private: bool SetAffinity() { #if defined(BENCHMARK_HAS_PTHREAD_AFFINITY) int ret = 0; self = pthread_self(); ret = pthread_getaffinity_np(self, sizeof(previous_affinity), &previous_affinity); if (ret != 0) { return false; } cpu_set_t affinity; memcpy(&affinity, &previous_affinity, sizeof(affinity)); bool is_first_cpu = true; for (int i = 0; i < CPU_SETSIZE; ++i) { if (CPU_ISSET(i, &affinity)) { if (is_first_cpu) { is_first_cpu = false; } else { CPU_CLR(i, &affinity); } } } if (is_first_cpu) { return false; } ret = pthread_setaffinity_np(self, sizeof(affinity), &affinity); return ret == 0; #elif defined(BENCHMARK_OS_WINDOWS_WIN32) self = GetCurrentThread(); DWORD_PTR mask = static_cast(1) << GetCurrentProcessorNumber(); previous_affinity = SetThreadAffinityMask(self, mask); return previous_affinity != 0; #else return false; #endif // def BENCHMARK_HAS_PTHREAD_AFFINITY } #if defined(BENCHMARK_HAS_PTHREAD_AFFINITY) pthread_t self{}; cpu_set_t previous_affinity{}; #elif defined(BENCHMARK_OS_WINDOWS_WIN32) HANDLE self; DWORD_PTR previous_affinity; #endif // def BENCHMARK_HAS_PTHREAD_AFFINITY bool reset_affinity; }; double GetCPUCyclesPerSecond(CPUInfo::Scaling scaling) { // Currently, scaling is only used on linux path here, // suppress diagnostics about it being unused on other paths. (void)scaling; #if defined BENCHMARK_OS_LINUX || defined BENCHMARK_OS_CYGWIN long freq = 0; // If the kernel is exporting the tsc frequency use that. There are issues // where cpuinfo_max_freq cannot be relied on because the BIOS may be // exporintg an invalid p-state (on x86) or p-states may be used to put the // processor in a new mode (turbo mode). Essentially, those frequencies // cannot always be relied upon. The same reasons apply to /proc/cpuinfo as // well. if (ReadFromFile("/sys/devices/system/cpu/cpu0/tsc_freq_khz", &freq) // If CPU scaling is disabled, use the *current* frequency. // Note that we specifically don't want to read cpuinfo_cur_freq, // because it is only readable by root. || (scaling == CPUInfo::Scaling::DISABLED && ReadFromFile("/sys/devices/system/cpu/cpu0/cpufreq/scaling_cur_freq", &freq)) // Otherwise, if CPU scaling may be in effect, we want to use // the *maximum* frequency, not whatever CPU speed some random processor // happens to be using now. || ReadFromFile("/sys/devices/system/cpu/cpu0/cpufreq/cpuinfo_max_freq", &freq)) { // The value is in kHz (as the file name suggests). For example, on a // 2GHz warpstation, the file contains the value "2000000". return static_cast(freq) * 1000.0; } const double error_value = -1; double bogo_clock = error_value; std::ifstream f("/proc/cpuinfo"); if (!f.is_open()) { std::cerr << "failed to open /proc/cpuinfo\n"; return error_value; } auto StartsWithKey = [](std::string const& Value, std::string const& Key) { if (Key.size() > Value.size()) { return false; } auto Cmp = [&](char X, char Y) { return std::tolower(X) == std::tolower(Y); }; return std::equal(Key.begin(), Key.end(), Value.begin(), Cmp); }; std::string ln; while (std::getline(f, ln)) { if (ln.empty()) { continue; } std::size_t split_idx = ln.find(':'); std::string value; if (split_idx != std::string::npos) { value = ln.substr(split_idx + 1); } // When parsing the "cpu MHz" and "bogomips" (fallback) entries, we only // accept positive values. Some environments (virtual machines) report zero, // which would cause infinite looping in WallTime_Init. if (StartsWithKey(ln, "cpu MHz")) { if (!value.empty()) { double cycles_per_second = benchmark::stod(value) * 1000000.0; if (cycles_per_second > 0) { return cycles_per_second; } } } else if (StartsWithKey(ln, "bogomips")) { if (!value.empty()) { bogo_clock = benchmark::stod(value) * 1000000.0; if (bogo_clock < 0.0) { bogo_clock = error_value; } } } } if (f.bad()) { std::cerr << "Failure reading /proc/cpuinfo\n"; return error_value; } if (!f.eof()) { std::cerr << "Failed to read to end of /proc/cpuinfo\n"; return error_value; } f.close(); // If we found the bogomips clock, but nothing better, we'll use it (but // we're not happy about it); otherwise, fallback to the rough estimation // below. if (bogo_clock >= 0.0) { return bogo_clock; } #elif defined BENCHMARK_HAS_SYSCTL constexpr auto* freqStr = #if defined(BENCHMARK_OS_FREEBSD) || defined(BENCHMARK_OS_NETBSD) "machdep.tsc_freq"; #elif defined BENCHMARK_OS_OPENBSD "hw.cpuspeed"; #elif defined BENCHMARK_OS_DRAGONFLY "hw.tsc_frequency"; #else "hw.cpufrequency"; #endif unsigned long long hz = 0; #if defined BENCHMARK_OS_OPENBSD if (GetSysctl(freqStr, &hz)) { return static_cast(hz * 1000000); } #else if (GetSysctl(freqStr, &hz)) { return static_cast(hz); } #endif fprintf(stderr, "Unable to determine clock rate from sysctl: %s: %s\n", freqStr, strerror(errno)); fprintf(stderr, "This does not affect benchmark measurements, only the " "metadata output.\n"); #elif defined BENCHMARK_OS_WINDOWS_WIN32 // In NT, read MHz from the registry. If we fail to do so or we're in win9x // then make a crude estimate. DWORD data, data_size = sizeof(data); if (IsWindowsXPOrGreater() && SUCCEEDED( SHGetValueA(HKEY_LOCAL_MACHINE, "HARDWARE\\DESCRIPTION\\System\\CentralProcessor\\0", "~MHz", nullptr, &data, &data_size))) { return static_cast(static_cast(data) * static_cast(1000 * 1000)); // was mhz } #elif defined(BENCHMARK_OS_SOLARIS) kstat_ctl_t* kc = kstat_open(); if (!kc) { std::cerr << "failed to open /dev/kstat\n"; return -1; } kstat_t* ksp = kstat_lookup(kc, const_cast("cpu_info"), -1, const_cast("cpu_info0")); if (!ksp) { std::cerr << "failed to lookup in /dev/kstat\n"; return -1; } if (kstat_read(kc, ksp, NULL) < 0) { std::cerr << "failed to read from /dev/kstat\n"; return -1; } kstat_named_t* knp = (kstat_named_t*)kstat_data_lookup( ksp, const_cast("current_clock_Hz")); if (!knp) { std::cerr << "failed to lookup data in /dev/kstat\n"; return -1; } if (knp->data_type != KSTAT_DATA_UINT64) { std::cerr << "current_clock_Hz is of unexpected data type: " << knp->data_type << "\n"; return -1; } double clock_hz = knp->value.ui64; kstat_close(kc); return clock_hz; #elif defined(BENCHMARK_OS_QNX) return static_cast( static_cast(SYSPAGE_ENTRY(cpuinfo)->speed) * static_cast(1000 * 1000)); #elif defined(BENCHMARK_OS_QURT) // QuRT doesn't provide any API to query Hexagon frequency. return 1000000000; #endif // If we've fallen through, attempt to roughly estimate the CPU clock rate. // Make sure to use the same cycle counter when starting and stopping the // cycle timer. We just pin the current thread to a cpu in the previous // affinity set. ThreadAffinityGuard affinity_guard; static constexpr double estimate_time_s = 1.0; const double start_time = ChronoClockNow(); const auto start_ticks = cycleclock::Now(); // Impose load instead of calling sleep() to make sure the cycle counter // works. using PRNG = std::minstd_rand; using Result = PRNG::result_type; PRNG rng(static_cast(start_ticks)); Result state = 0; do { static constexpr size_t batch_size = 10000; rng.discard(batch_size); state += rng(); } while (ChronoClockNow() - start_time < estimate_time_s); DoNotOptimize(state); const auto end_ticks = cycleclock::Now(); const double end_time = ChronoClockNow(); return static_cast(end_ticks - start_ticks) / (end_time - start_time); // Reset the affinity of current thread when the lifetime of affinity_guard // ends. } std::vector GetLoadAvg() { #if (defined BENCHMARK_OS_FREEBSD || defined(BENCHMARK_OS_LINUX) || \ defined BENCHMARK_OS_MACOSX || defined BENCHMARK_OS_NETBSD || \ defined BENCHMARK_OS_OPENBSD || defined BENCHMARK_OS_DRAGONFLY) && \ !(defined(__ANDROID__) && __ANDROID_API__ < 29) static constexpr int kMaxSamples = 3; std::vector res(kMaxSamples, 0.0); const auto nelem = getloadavg(res.data(), kMaxSamples); if (nelem < 1) { res.clear(); } else { res.resize(static_cast(nelem)); } return res; #else return {}; #endif } } // end namespace const CPUInfo& CPUInfo::Get() { static const CPUInfo* info = new CPUInfo(); return *info; } CPUInfo::CPUInfo() : num_cpus(GetNumCPUs()), scaling(CpuScaling(num_cpus)), cycles_per_second(GetCPUCyclesPerSecond(scaling)), caches(GetCacheSizes()), load_avg(GetLoadAvg()) {} const SystemInfo& SystemInfo::Get() { static const SystemInfo* info = new SystemInfo(); return *info; } SystemInfo::SystemInfo() : name(GetSystemName()), ASLRStatus(GetASLR()) {} } // end namespace benchmark ================================================ FILE: src/thread_manager.h ================================================ #ifndef BENCHMARK_THREAD_MANAGER_H #define BENCHMARK_THREAD_MANAGER_H #include #include "benchmark/counter.h" #include "benchmark/statistics.h" #include "benchmark/types.h" #include "mutex.h" namespace benchmark { namespace internal { class ThreadManager { public: explicit ThreadManager(int num_threads) : start_stop_barrier_(num_threads) {} Mutex& GetBenchmarkMutex() const RETURN_CAPABILITY(benchmark_mutex_) { return benchmark_mutex_; } bool StartStopBarrier() { return start_stop_barrier_.wait(); } void NotifyThreadComplete() { start_stop_barrier_.removeThread(); } struct Result { IterationCount iterations = 0; double real_time_used = 0; double cpu_time_used = 0; double manual_time_used = 0; int64_t complexity_n = 0; std::string report_label_; std::string skip_message_; internal::Skipped skipped_ = internal::NotSkipped; UserCounters counters; }; GUARDED_BY(GetBenchmarkMutex()) Result results; private: mutable Mutex benchmark_mutex_; Barrier start_stop_barrier_; }; } // namespace internal } // namespace benchmark #endif // BENCHMARK_THREAD_MANAGER_H ================================================ FILE: src/thread_timer.h ================================================ #ifndef BENCHMARK_THREAD_TIMER_H #define BENCHMARK_THREAD_TIMER_H #include "check.h" #include "timers.h" namespace benchmark { namespace internal { class ThreadTimer { explicit ThreadTimer(bool measure_process_cpu_time_) : measure_process_cpu_time(measure_process_cpu_time_) {} public: static ThreadTimer Create() { return ThreadTimer(/*measure_process_cpu_time_=*/false); } static ThreadTimer CreateProcessCpuTime() { return ThreadTimer(/*measure_process_cpu_time_=*/true); } // Called by each thread void StartTimer() { running_ = true; start_real_time_ = ChronoClockNow(); start_cpu_time_ = ReadCpuTimerOfChoice(); } // Called by each thread void StopTimer() { BM_CHECK(running_); running_ = false; real_time_used_ += ChronoClockNow() - start_real_time_; // Floating point error can result in the subtraction producing a negative // time. Guard against that. cpu_time_used_ += std::max(ReadCpuTimerOfChoice() - start_cpu_time_, 0); } // Called by each thread void SetIterationTime(double seconds) { manual_time_used_ += seconds; } bool running() const { return running_; } // REQUIRES: timer is not running double real_time_used() const { BM_CHECK(!running_); return real_time_used_; } // REQUIRES: timer is not running double cpu_time_used() const { BM_CHECK(!running_); return cpu_time_used_; } // REQUIRES: timer is not running double manual_time_used() const { BM_CHECK(!running_); return manual_time_used_; } private: double ReadCpuTimerOfChoice() const { if (measure_process_cpu_time) return ProcessCPUUsage(); return ThreadCPUUsage(); } // should the thread, or the process, time be measured? const bool measure_process_cpu_time; bool running_ = false; // Is the timer running double start_real_time_ = 0; // If running_ double start_cpu_time_ = 0; // If running_ // Accumulated time so far (does not contain current slice if running_) double real_time_used_ = 0; double cpu_time_used_ = 0; // Manually set iteration time. User sets this with SetIterationTime(seconds). double manual_time_used_ = 0; }; } // namespace internal } // namespace benchmark #endif // BENCHMARK_THREAD_TIMER_H ================================================ FILE: src/timers.cc ================================================ // Copyright 2015 Google Inc. All rights reserved. // // Licensed under the Apache License, Version 2.0 (the "License"); // you may not use this file except in compliance with the License. // You may obtain a copy of the License at // // http://www.apache.org/licenses/LICENSE-2.0 // // Unless required by applicable law or agreed to in writing, software // distributed under the License is distributed on an "AS IS" BASIS, // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. // See the License for the specific language governing permissions and // limitations under the License. #include "timers.h" #include "internal_macros.h" #ifdef BENCHMARK_OS_WINDOWS #include #undef StrCat // Don't let StrCat in string_util.h be renamed to lstrcatA #include #include #else #include #if !defined(BENCHMARK_OS_FUCHSIA) && !defined(BENCHMARK_OS_QURT) && \ !defined(BENCHMARK_OS_WASI) #include #endif #include #include // this header must be included before 'sys/sysctl.h' to avoid compilation error on FreeBSD #include #if defined BENCHMARK_OS_FREEBSD || defined BENCHMARK_OS_DRAGONFLY || \ defined BENCHMARK_OS_MACOSX #include #endif #if defined(BENCHMARK_OS_MACOSX) #include #include #include #endif #if defined(BENCHMARK_OS_QURT) #include #endif #endif #ifdef BENCHMARK_OS_EMSCRIPTEN #include #endif #include #include #include #include #include #include #include #include #include #include "check.h" #include "log.h" #include "string_util.h" namespace benchmark { // Suppress unused warnings on helper functions. #if defined(__GNUC__) #pragma GCC diagnostic ignored "-Wunused-function" #endif #if defined(__NVCOMPILER) #pragma diag_suppress declared_but_not_referenced #endif namespace { #if defined(BENCHMARK_OS_WINDOWS) double MakeTime(FILETIME const& kernel_time, FILETIME const& user_time) { ULARGE_INTEGER kernel; ULARGE_INTEGER user; kernel.HighPart = kernel_time.dwHighDateTime; kernel.LowPart = kernel_time.dwLowDateTime; user.HighPart = user_time.dwHighDateTime; user.LowPart = user_time.dwLowDateTime; return (static_cast(kernel.QuadPart) + static_cast(user.QuadPart)) * 1e-7; } #elif !defined(BENCHMARK_OS_FUCHSIA) && !defined(BENCHMARK_OS_QURT) && \ !defined(BENCHMARK_OS_WASI) double MakeTime(struct rusage const& ru) { return (static_cast(ru.ru_utime.tv_sec) + static_cast(ru.ru_utime.tv_usec) * 1e-6 + static_cast(ru.ru_stime.tv_sec) + static_cast(ru.ru_stime.tv_usec) * 1e-6); } #endif #if defined(BENCHMARK_OS_MACOSX) double MakeTime(thread_basic_info_data_t const& info) { return (static_cast(info.user_time.seconds) + static_cast(info.user_time.microseconds) * 1e-6 + static_cast(info.system_time.seconds) + static_cast(info.system_time.microseconds) * 1e-6); } #endif #if defined(CLOCK_PROCESS_CPUTIME_ID) || defined(CLOCK_THREAD_CPUTIME_ID) double MakeTime(struct timespec const& ts) { return static_cast(ts.tv_sec) + (static_cast(ts.tv_nsec) * 1e-9); } #endif BENCHMARK_NORETURN void DiagnoseAndExit(const char* msg) { std::cerr << "ERROR: " << msg << '\n'; std::flush(std::cerr); std::exit(EXIT_FAILURE); } } // end namespace double ProcessCPUUsage() { #if defined(BENCHMARK_OS_WINDOWS) HANDLE proc = GetCurrentProcess(); FILETIME creation_time; FILETIME exit_time; FILETIME kernel_time; FILETIME user_time; if (GetProcessTimes(proc, &creation_time, &exit_time, &kernel_time, &user_time)) return MakeTime(kernel_time, user_time); DiagnoseAndExit("GetProccessTimes() failed"); #elif defined(BENCHMARK_OS_QURT) // Note that qurt_timer_get_ticks() is no longer documented as of SDK 5.3.0, // and doesn't appear to work on at least some devices (eg Samsung S22), // so let's use the actually-documented and apparently-equivalent // qurt_sysclock_get_hw_ticks() call instead. return static_cast( qurt_timer_timetick_to_us(qurt_sysclock_get_hw_ticks())) * 1.0e-6; #elif defined(BENCHMARK_OS_EMSCRIPTEN) // clock_gettime(CLOCK_PROCESS_CPUTIME_ID, ...) returns 0 on Emscripten. // Use Emscripten-specific API. Reported CPU time would be exactly the // same as total time, but this is ok because there aren't long-latency // synchronous system calls in Emscripten. return emscripten_get_now() * 1e-3; #elif defined(BENCHMARK_OS_WASI) // WASI lacks CLOCK_PROCESS_CPUTIME_ID and getrusage; use monotonic clock. struct timespec ts {}; if (clock_gettime(CLOCK_MONOTONIC, &ts) == 0) { return static_cast(ts.tv_sec) + (static_cast(ts.tv_nsec) * 1e-9); } DiagnoseAndExit("clock_gettime(CLOCK_MONOTONIC, ...) failed"); #elif defined(CLOCK_PROCESS_CPUTIME_ID) && !defined(BENCHMARK_OS_MACOSX) // FIXME We want to use clock_gettime, but its not available in MacOS 10.11. // See https://github.com/google/benchmark/pull/292 struct timespec spec {}; if (clock_gettime(CLOCK_PROCESS_CPUTIME_ID, &spec) == 0) { return MakeTime(spec); } DiagnoseAndExit("clock_gettime(CLOCK_PROCESS_CPUTIME_ID, ...) failed"); #else struct rusage ru; if (getrusage(RUSAGE_SELF, &ru) == 0) return MakeTime(ru); DiagnoseAndExit("getrusage(RUSAGE_SELF, ...) failed"); #endif } double ThreadCPUUsage() { #if defined(BENCHMARK_OS_WINDOWS) HANDLE this_thread = GetCurrentThread(); FILETIME creation_time; FILETIME exit_time; FILETIME kernel_time; FILETIME user_time; GetThreadTimes(this_thread, &creation_time, &exit_time, &kernel_time, &user_time); return MakeTime(kernel_time, user_time); #elif defined(BENCHMARK_OS_QURT) // Note that qurt_timer_get_ticks() is no longer documented as of SDK 5.3.0, // and doesn't appear to work on at least some devices (eg Samsung S22), // so let's use the actually-documented and apparently-equivalent // qurt_sysclock_get_hw_ticks() call instead. return static_cast( qurt_timer_timetick_to_us(qurt_sysclock_get_hw_ticks())) * 1.0e-6; #elif defined(BENCHMARK_OS_MACOSX) // FIXME We want to use clock_gettime, but its not available in MacOS 10.11. // See https://github.com/google/benchmark/pull/292 mach_msg_type_number_t count = THREAD_BASIC_INFO_COUNT; thread_basic_info_data_t info; mach_port_t thread = pthread_mach_thread_np(pthread_self()); if (thread_info(thread, THREAD_BASIC_INFO, reinterpret_cast(&info), &count) == KERN_SUCCESS) { return MakeTime(info); } DiagnoseAndExit("ThreadCPUUsage() failed when evaluating thread_info"); #elif defined(BENCHMARK_OS_EMSCRIPTEN) // Emscripten doesn't support traditional threads return ProcessCPUUsage(); #elif defined(BENCHMARK_OS_RTEMS) // RTEMS doesn't support CLOCK_THREAD_CPUTIME_ID. See // https://github.com/RTEMS/rtems/blob/master/cpukit/posix/src/clockgettime.c return ProcessCPUUsage(); #elif defined(BENCHMARK_OS_ZOS) // z/OS doesn't support CLOCK_THREAD_CPUTIME_ID. return ProcessCPUUsage(); #elif defined(BENCHMARK_OS_WASI) // WASI doesn't support per-thread CPU timing; fall back to process time. return ProcessCPUUsage(); #elif defined(BENCHMARK_OS_SOLARIS) struct rusage ru; if (getrusage(RUSAGE_LWP, &ru) == 0) return MakeTime(ru); DiagnoseAndExit("getrusage(RUSAGE_LWP, ...) failed"); #elif defined(CLOCK_THREAD_CPUTIME_ID) struct timespec ts {}; if (clock_gettime(CLOCK_THREAD_CPUTIME_ID, &ts) == 0) { return MakeTime(ts); } DiagnoseAndExit("clock_gettime(CLOCK_THREAD_CPUTIME_ID, ...) failed"); #else #error Per-thread timing is not available on your system. #endif } std::string LocalDateTimeString() { // Write the local time in RFC3339 format yyyy-mm-ddTHH:MM:SS+/-HH:MM. typedef std::chrono::system_clock Clock; std::time_t now = Clock::to_time_t(Clock::now()); const std::size_t kTzOffsetLen = 6; const std::size_t kTimestampLen = 19; std::size_t tz_len = 0; std::size_t timestamp_len = 0; long int offset_minutes = 0; char tz_offset_sign = '+'; // tz_offset is set in one of three ways: // * strftime with %z - This either returns empty or the ISO 8601 time. The // maximum length an // ISO 8601 string can be is 7 (e.g. -03:30, plus trailing zero). // * snprintf with %c%02li:%02li - The maximum length is 41 (one for %c, up to // 19 for %02li, // one for :, up to 19 %02li, plus trailing zero). // * A fixed string of "-00:00". The maximum length is 7 (-00:00, plus // trailing zero). // // Thus, the maximum size this needs to be is 41. char tz_offset[41]; // Long enough buffer to avoid format-overflow warnings char storage[128]; #if defined(BENCHMARK_OS_WINDOWS) std::tm* timeinfo_p = ::localtime(&now); #else std::tm timeinfo{}; std::tm* timeinfo_p = &timeinfo; ::localtime_r(&now, &timeinfo); #endif tz_len = std::strftime(tz_offset, sizeof(tz_offset), "%z", timeinfo_p); if (tz_len < kTzOffsetLen && tz_len > 1) { // Timezone offset was written. strftime writes offset as +HHMM or -HHMM, // RFC3339 specifies an offset as +HH:MM or -HH:MM. To convert, we parse // the offset as an integer, then reprint it to a string. offset_minutes = ::strtol(tz_offset, NULL, 10); if (offset_minutes < 0) { offset_minutes *= -1; tz_offset_sign = '-'; } tz_len = static_cast( ::snprintf(tz_offset, sizeof(tz_offset), "%c%02li:%02li", tz_offset_sign, offset_minutes / 100, offset_minutes % 100)); BM_CHECK(tz_len == kTzOffsetLen); ((void)tz_len); // Prevent unused variable warning in optimized build. } else { // Unknown offset. RFC3339 specifies that unknown local offsets should be // written as UTC time with -00:00 timezone. #if defined(BENCHMARK_OS_WINDOWS) // Potential race condition if another thread calls localtime or gmtime. timeinfo_p = ::gmtime(&now); #else ::gmtime_r(&now, &timeinfo); #endif strncpy(tz_offset, "-00:00", kTzOffsetLen + 1); } timestamp_len = std::strftime(storage, sizeof(storage), "%Y-%m-%dT%H:%M:%S", timeinfo_p); BM_CHECK(timestamp_len == kTimestampLen); // Prevent unused variable warning in optimized build. ((void)kTimestampLen); std::strncat(storage, tz_offset, sizeof(storage) - timestamp_len - 1); return std::string(storage); } } // end namespace benchmark ================================================ FILE: src/timers.h ================================================ #ifndef BENCHMARK_TIMERS_H #define BENCHMARK_TIMERS_H #include #include namespace benchmark { // Return the CPU usage of the current process double ProcessCPUUsage(); // Return the CPU usage of the children of the current process double ChildrenCPUUsage(); // Return the CPU usage of the current thread double ThreadCPUUsage(); #if defined(BENCHMARK_OS_QURT) // std::chrono::now() can return 0 on some Hexagon devices; // this reads the value of a 56-bit, 19.2MHz hardware counter // and converts it to seconds. Unlike std::chrono, this doesn't // return an absolute time, but since ChronoClockNow() is only used // to compute elapsed time, this shouldn't matter. struct QuRTClock { typedef uint64_t rep; typedef std::ratio<1, 19200000> period; typedef std::chrono::duration duration; typedef std::chrono::time_point time_point; static const bool is_steady = false; static time_point now() { unsigned long long count; asm volatile(" %0 = c31:30 " : "=r"(count)); return time_point(static_cast(count)); } }; #else #if defined(HAVE_STEADY_CLOCK) template struct ChooseSteadyClock { typedef std::chrono::high_resolution_clock type; }; template <> struct ChooseSteadyClock { typedef std::chrono::steady_clock type; }; #endif // HAVE_STEADY_CLOCK #endif struct ChooseClockType { #if defined(BENCHMARK_OS_QURT) typedef QuRTClock type; #elif defined(HAVE_STEADY_CLOCK) typedef ChooseSteadyClock<>::type type; #else typedef std::chrono::high_resolution_clock type; #endif }; inline double ChronoClockNow() { typedef ChooseClockType::type ClockType; using FpSeconds = std::chrono::duration; return FpSeconds(ClockType::now().time_since_epoch()).count(); } std::string LocalDateTimeString(); } // end namespace benchmark #endif // BENCHMARK_TIMERS_H ================================================ FILE: test/AssemblyTests.cmake ================================================ set(CLANG_SUPPORTED_VERSION "5.0.0") set(GCC_SUPPORTED_VERSION "5.5.0") if (CMAKE_CXX_COMPILER_ID MATCHES "Clang") if (NOT CMAKE_CXX_COMPILER_VERSION VERSION_EQUAL ${CLANG_SUPPORTED_VERSION}) message (WARNING "Unsupported Clang version " ${CMAKE_CXX_COMPILER_VERSION} ". Expected is " ${CLANG_SUPPORTED_VERSION} ". Assembly tests may be broken.") endif() elseif(CMAKE_CXX_COMPILER_ID MATCHES "GNU") if (NOT CMAKE_CXX_COMPILER_VERSION VERSION_EQUAL ${GCC_SUPPORTED_VERSION}) message (WARNING "Unsupported GCC version " ${CMAKE_CXX_COMPILER_VERSION} ". Expected is " ${GCC_SUPPORTED_VERSION} ". Assembly tests may be broken.") endif() else() message (WARNING "Unsupported compiler. Assembly tests may be broken.") endif() include(split_list) set(ASM_TEST_FLAGS "") check_cxx_compiler_flag(-O3 BENCHMARK_HAS_O3_FLAG) if (BENCHMARK_HAS_O3_FLAG) list(APPEND ASM_TEST_FLAGS -O3) endif() check_cxx_compiler_flag(-g0 BENCHMARK_HAS_G0_FLAG) if (BENCHMARK_HAS_G0_FLAG) list(APPEND ASM_TEST_FLAGS -g0) endif() check_cxx_compiler_flag(-fno-stack-protector BENCHMARK_HAS_FNO_STACK_PROTECTOR_FLAG) if (BENCHMARK_HAS_FNO_STACK_PROTECTOR_FLAG) list(APPEND ASM_TEST_FLAGS -fno-stack-protector) endif() split_list(ASM_TEST_FLAGS) string(TOUPPER "${CMAKE_CXX_COMPILER_ID}" ASM_TEST_COMPILER) macro(add_filecheck_test name) cmake_parse_arguments(ARG "" "" "CHECK_PREFIXES" ${ARGV}) add_library(${name} OBJECT ${name}.cc) target_link_libraries(${name} PRIVATE benchmark::benchmark) set_target_properties(${name} PROPERTIES COMPILE_FLAGS "-S ${ASM_TEST_FLAGS}") set(ASM_OUTPUT_FILE "${CMAKE_CURRENT_BINARY_DIR}/${name}.s") add_custom_target(copy_${name} ALL COMMAND ${PROJECT_SOURCE_DIR}/tools/strip_asm.py $ ${ASM_OUTPUT_FILE} BYPRODUCTS ${ASM_OUTPUT_FILE}) add_dependencies(copy_${name} ${name}) if (NOT ARG_CHECK_PREFIXES) set(ARG_CHECK_PREFIXES "CHECK") endif() foreach(prefix ${ARG_CHECK_PREFIXES}) add_test(NAME run_${name}_${prefix} COMMAND ${LLVM_FILECHECK_EXE} ${name}.cc --input-file=${ASM_OUTPUT_FILE} --check-prefixes=CHECK,CHECK-${ASM_TEST_COMPILER} WORKING_DIRECTORY ${CMAKE_CURRENT_SOURCE_DIR}) endforeach() endmacro() ================================================ FILE: test/BUILD ================================================ load("@rules_cc//cc:defs.bzl", "cc_library", "cc_test") platform( name = "windows", constraint_values = [ "@platforms//os:windows", ], ) TEST_COPTS = [ "-pedantic", "-pedantic-errors", "-std=c++17", "-Wall", "-Wconversion", "-Wextra", "-Wshadow", # "-Wshorten-64-to-32", "-Wfloat-equal", "-fstrict-aliasing", ## assert() are used a lot in tests upstream, which may be optimised out leading to ## unused-variable warning. "-Wno-unused-variable", "-Werror=old-style-cast", ] TEST_MSVC_OPTS = [ "/std:c++17", ] # Some of the issues with DoNotOptimize only occur when optimization is enabled PER_SRC_COPTS = { "donotoptimize_test.cc": ["-O3"], } TEST_ARGS = ["--benchmark_min_time=0.01s"] PER_SRC_TEST_ARGS = { "user_counters_tabular_test.cc": ["--benchmark_counters_tabular=true"], "repetitions_test.cc": [" --benchmark_repetitions=3"], "spec_arg_test.cc": ["--benchmark_filter=BM_NotChosen"], "spec_arg_verbosity_test.cc": ["--v=42"], "complexity_test.cc": ["--benchmark_min_time=1000000x"], } cc_library( name = "output_test_helper", testonly = 1, srcs = ["output_test_helper.cc"], hdrs = ["output_test.h"], copts = select({ "//:windows": TEST_MSVC_OPTS, "//conditions:default": TEST_COPTS, }), deps = [ "//:benchmark", "//:benchmark_internal_headers", ], ) # Tests that use gtest. These rely on `gtest_main`. [ cc_test( name = test_src[:-len(".cc")], size = "small", srcs = [test_src], copts = select({ "//:windows": TEST_MSVC_OPTS, "//conditions:default": TEST_COPTS, }) + PER_SRC_COPTS.get(test_src, []), deps = [ "//:benchmark", "//:benchmark_internal_headers", "@com_google_googletest//:gtest", "@com_google_googletest//:gtest_main", ], ) for test_src in glob(["*_gtest.cc"]) ] # Tests that do not use gtest. These have their own `main` defined. [ cc_test( name = test_src[:-len(".cc")], size = "small", srcs = [test_src], args = TEST_ARGS + PER_SRC_TEST_ARGS.get(test_src, []), copts = select({ "//:windows": TEST_MSVC_OPTS, "//conditions:default": TEST_COPTS, }) + PER_SRC_COPTS.get(test_src, []), deps = [ ":output_test_helper", "//:benchmark", "//:benchmark_internal_headers", ], # FIXME: Add support for assembly tests to bazel. # See Issue #556 # https://github.com/google/benchmark/issues/556 ) for test_src in glob( ["*_test.cc"], exclude = [ "*_assembly_test.cc", "cxx11_test.cc", "link_main_test.cc", ], ) ] cc_test( name = "cxx11_test", size = "small", srcs = ["cxx11_test.cc"], copts = TEST_COPTS + ["-std=c++11"], target_compatible_with = select({ "//:windows": ["@platforms//:incompatible"], "//conditions:default": [], }), deps = [ ":output_test_helper", "//:benchmark_main", ], ) cc_test( name = "link_main_test", size = "small", srcs = ["link_main_test.cc"], copts = select({ "//:windows": TEST_MSVC_OPTS, "//conditions:default": TEST_COPTS, }), deps = ["//:benchmark_main"], ) ================================================ FILE: test/CMakeLists.txt ================================================ #Enable the tests set(THREADS_PREFER_PTHREAD_FLAG ON) find_package(Threads REQUIRED) include(CheckCXXCompilerFlag) add_cxx_compiler_flag(-Wno-unused-variable) # NOTE: Some tests use `` to perform the test. Therefore we must # strip -DNDEBUG from the default CMake flags in DEBUG mode. string(TOUPPER "${CMAKE_BUILD_TYPE}" uppercase_CMAKE_BUILD_TYPE) if( NOT uppercase_CMAKE_BUILD_TYPE STREQUAL "DEBUG" ) add_definitions( -UNDEBUG ) add_definitions(-DTEST_BENCHMARK_LIBRARY_HAS_NO_ASSERTIONS) # Also remove /D NDEBUG to avoid MSVC warnings about conflicting defines. foreach (flags_var_to_scrub CMAKE_CXX_FLAGS_RELEASE CMAKE_CXX_FLAGS_RELWITHDEBINFO CMAKE_CXX_FLAGS_MINSIZEREL CMAKE_C_FLAGS_RELEASE CMAKE_C_FLAGS_RELWITHDEBINFO CMAKE_C_FLAGS_MINSIZEREL) string (REGEX REPLACE "(^| )[/-]D *NDEBUG($| )" " " "${flags_var_to_scrub}" "${${flags_var_to_scrub}}") endforeach() endif() if (NOT BUILD_SHARED_LIBS) add_definitions(-DBENCHMARK_STATIC_DEFINE) endif() check_cxx_compiler_flag(-O3 BENCHMARK_HAS_O3_FLAG) set(BENCHMARK_O3_FLAG "") if (BENCHMARK_HAS_O3_FLAG) set(BENCHMARK_O3_FLAG "-O3") endif() # NOTE: These flags must be added after find_package(Threads REQUIRED) otherwise # they will break the configuration check. if (DEFINED BENCHMARK_CXX_LINKER_FLAGS) list(APPEND CMAKE_EXE_LINKER_FLAGS ${BENCHMARK_CXX_LINKER_FLAGS}) endif() add_library(output_test_helper STATIC output_test_helper.cc output_test.h) target_link_libraries(output_test_helper PRIVATE benchmark::benchmark) macro(compile_benchmark_test name) add_executable(${name} "${name}.cc") target_link_libraries(${name} benchmark::benchmark ${CMAKE_THREAD_LIBS_INIT}) if("${CMAKE_CXX_COMPILER_ID}" STREQUAL "NVHPC") target_compile_options( ${name} PRIVATE --diag_suppress partial_override ) endif() endmacro(compile_benchmark_test) macro(compile_benchmark_test_with_main name) add_executable(${name} "${name}.cc") target_link_libraries(${name} benchmark::benchmark_main) endmacro(compile_benchmark_test_with_main) macro(compile_output_test name) add_executable(${name} "${name}.cc" output_test.h) target_link_libraries(${name} output_test_helper benchmark::benchmark_main ${BENCHMARK_CXX_LIBRARIES} ${CMAKE_THREAD_LIBS_INIT}) endmacro(compile_output_test) macro(benchmark_add_test) add_test(${ARGV}) if(WIN32 AND BUILD_SHARED_LIBS) cmake_parse_arguments(TEST "" "NAME" "" ${ARGN}) set_tests_properties(${TEST_NAME} PROPERTIES ENVIRONMENT_MODIFICATION "PATH=path_list_prepend:$") endif() endmacro(benchmark_add_test) # Demonstration executable compile_benchmark_test_with_main(cxx11_test) if(DEFINED MSVC) # MSVC does not really support C++11. set_property(TARGET cxx11_test PROPERTY CXX_STANDARD 14) else() set_property(TARGET cxx11_test PROPERTY CXX_STANDARD 11) endif() set_property(TARGET cxx11_test PROPERTY CXX_STANDARD_REQUIRED ON) set_property(TARGET cxx11_test PROPERTY CXX_EXTENSIONS OFF) benchmark_add_test(NAME cxx11_test COMMAND cxx11_test --benchmark_min_time=0.01s) compile_benchmark_test(benchmark_test) benchmark_add_test(NAME benchmark COMMAND benchmark_test --benchmark_min_time=0.01s) compile_benchmark_test(spec_arg_test) benchmark_add_test(NAME spec_arg COMMAND spec_arg_test --benchmark_filter=BM_NotChosen) compile_benchmark_test(spec_arg_verbosity_test) benchmark_add_test(NAME spec_arg_verbosity COMMAND spec_arg_verbosity_test --v=42) compile_benchmark_test(benchmark_setup_teardown_test) benchmark_add_test(NAME benchmark_setup_teardown COMMAND benchmark_setup_teardown_test) compile_benchmark_test(filter_test) macro(add_filter_test name filter expect) benchmark_add_test(NAME ${name} COMMAND filter_test --benchmark_min_time=0.01s --benchmark_filter=${filter} ${expect}) benchmark_add_test(NAME ${name}_list_only COMMAND filter_test --benchmark_list_tests --benchmark_filter=${filter} ${expect}) endmacro(add_filter_test) compile_benchmark_test(benchmark_min_time_flag_time_test) benchmark_add_test(NAME min_time_flag_time COMMAND benchmark_min_time_flag_time_test) compile_benchmark_test(benchmark_min_time_flag_iters_test) benchmark_add_test(NAME min_time_flag_iters COMMAND benchmark_min_time_flag_iters_test) add_filter_test(filter_simple "Foo" 3) add_filter_test(filter_simple_negative "-Foo" 2) add_filter_test(filter_suffix "BM_.*" 4) add_filter_test(filter_suffix_negative "-BM_.*" 1) add_filter_test(filter_regex_all ".*" 5) add_filter_test(filter_regex_all_negative "-.*" 0) add_filter_test(filter_regex_blank "" 5) add_filter_test(filter_regex_blank_negative "-" 0) add_filter_test(filter_regex_none "monkey" 0) add_filter_test(filter_regex_none_negative "-monkey" 5) add_filter_test(filter_regex_wildcard ".*Foo.*" 3) add_filter_test(filter_regex_wildcard_negative "-.*Foo.*" 2) add_filter_test(filter_regex_begin "^BM_.*" 4) add_filter_test(filter_regex_begin_negative "-^BM_.*" 1) add_filter_test(filter_regex_begin2 "^N" 1) add_filter_test(filter_regex_begin2_negative "-^N" 4) add_filter_test(filter_regex_end ".*Ba$" 1) add_filter_test(filter_regex_end_negative "-.*Ba$" 4) compile_benchmark_test(options_test) benchmark_add_test(NAME options_benchmarks COMMAND options_test --benchmark_min_time=0.01s) compile_benchmark_test(basic_test) benchmark_add_test(NAME basic_benchmark COMMAND basic_test --benchmark_min_time=0.01s) compile_output_test(repetitions_test) benchmark_add_test(NAME repetitions_benchmark COMMAND repetitions_test --benchmark_min_time=0.01s --benchmark_repetitions=3) compile_benchmark_test(diagnostics_test) benchmark_add_test(NAME diagnostics_test COMMAND diagnostics_test --benchmark_min_time=0.01s) compile_benchmark_test(skip_with_error_test) benchmark_add_test(NAME skip_with_error_test COMMAND skip_with_error_test --benchmark_min_time=0.01s) compile_benchmark_test(donotoptimize_test) # Enable errors for deprecated deprecations (DoNotOptimize(Tp const& value)). check_cxx_compiler_flag(-Werror=deprecated-declarations BENCHMARK_HAS_DEPRECATED_DECLARATIONS_FLAG) if (BENCHMARK_HAS_DEPRECATED_DECLARATIONS_FLAG) target_compile_options (donotoptimize_test PRIVATE "-Werror=deprecated-declarations") endif() # Some of the issues with DoNotOptimize only occur when optimization is enabled check_cxx_compiler_flag(-O3 BENCHMARK_HAS_O3_FLAG) if (BENCHMARK_HAS_O3_FLAG) set_target_properties(donotoptimize_test PROPERTIES COMPILE_FLAGS "-O3") endif() benchmark_add_test(NAME donotoptimize_test COMMAND donotoptimize_test --benchmark_min_time=0.01s) compile_benchmark_test(fixture_test) benchmark_add_test(NAME fixture_test COMMAND fixture_test --benchmark_min_time=0.01s) compile_benchmark_test(register_benchmark_test) benchmark_add_test(NAME register_benchmark_test COMMAND register_benchmark_test --benchmark_min_time=0.01s) compile_benchmark_test(map_test) benchmark_add_test(NAME map_test COMMAND map_test --benchmark_min_time=0.01s) compile_benchmark_test(multiple_ranges_test) benchmark_add_test(NAME multiple_ranges_test COMMAND multiple_ranges_test --benchmark_min_time=0.01s) compile_benchmark_test(args_product_test) benchmark_add_test(NAME args_product_test COMMAND args_product_test --benchmark_min_time=0.01s) compile_benchmark_test_with_main(link_main_test) benchmark_add_test(NAME link_main_test COMMAND link_main_test --benchmark_min_time=0.01s) compile_output_test(reporter_output_test) benchmark_add_test(NAME reporter_output_test COMMAND reporter_output_test --benchmark_min_time=0.01s) compile_output_test(templated_fixture_test) benchmark_add_test(NAME templated_fixture_test COMMAND templated_fixture_test --benchmark_min_time=0.01s) compile_output_test(templated_fixture_method_test) benchmark_add_test(NAME templated_fixture_method_test COMMAND templated_fixture_method_test --benchmark_min_time=0.01s) compile_output_test(user_counters_test) benchmark_add_test(NAME user_counters_test COMMAND user_counters_test --benchmark_min_time=0.2s) compile_output_test(user_counters_threads_test) benchmark_add_test(NAME user_counters_threads_test COMMAND user_counters_threads_test --benchmark_min_time=0.2s) compile_output_test(perf_counters_test) benchmark_add_test(NAME perf_counters_test COMMAND perf_counters_test --benchmark_min_time=0.01s --benchmark_perf_counters=CYCLES,INSTRUCTIONS) compile_output_test(internal_threading_test) benchmark_add_test(NAME internal_threading_test COMMAND internal_threading_test --benchmark_min_time=0.01s) compile_output_test(manual_threading_test) benchmark_add_test(NAME manual_threading_test COMMAND manual_threading_test --benchmark_min_time=0.01s) compile_output_test(report_aggregates_only_test) benchmark_add_test(NAME report_aggregates_only_test COMMAND report_aggregates_only_test --benchmark_min_time=0.01s) compile_output_test(display_aggregates_only_test) benchmark_add_test(NAME display_aggregates_only_test COMMAND display_aggregates_only_test --benchmark_min_time=0.01s) compile_output_test(user_counters_tabular_test) benchmark_add_test(NAME user_counters_tabular_test COMMAND user_counters_tabular_test --benchmark_counters_tabular=true --benchmark_min_time=0.2s) compile_output_test(user_counters_thousands_test) benchmark_add_test(NAME user_counters_thousands_test COMMAND user_counters_thousands_test --benchmark_min_time=0.01s) compile_output_test(memory_manager_test) benchmark_add_test(NAME memory_manager_test COMMAND memory_manager_test --benchmark_min_time=0.01s) compile_output_test(profiler_manager_test) benchmark_add_test(NAME profiler_manager_test COMMAND profiler_manager_test --benchmark_min_time=0.01s) compile_benchmark_test(profiler_manager_iterations_test) benchmark_add_test(NAME profiler_manager_iterations COMMAND profiler_manager_iterations_test) compile_output_test(complexity_test) benchmark_add_test(NAME complexity_benchmark COMMAND complexity_test --benchmark_min_time=1000000x) compile_output_test(locale_impermeability_test) benchmark_add_test(NAME locale_impermeability_test COMMAND locale_impermeability_test) ############################################################################### # GoogleTest Unit Tests ############################################################################### if (BENCHMARK_ENABLE_GTEST_TESTS) macro(compile_gtest name) add_executable(${name} "${name}.cc") target_link_libraries(${name} benchmark::benchmark gmock_main ${CMAKE_THREAD_LIBS_INIT}) endmacro(compile_gtest) macro(add_gtest name) compile_gtest(${name}) benchmark_add_test(NAME ${name} COMMAND ${name}) if(WIN32 AND BUILD_SHARED_LIBS) set_tests_properties(${name} PROPERTIES ENVIRONMENT_MODIFICATION "PATH=path_list_prepend:$;PATH=path_list_prepend:$" ) endif() endmacro() add_gtest(benchmark_gtest) add_gtest(benchmark_name_gtest) add_gtest(benchmark_random_interleaving_gtest) add_gtest(commandlineflags_gtest) add_gtest(statistics_gtest) add_gtest(string_util_gtest) add_gtest(perf_counters_gtest) add_gtest(time_unit_gtest) add_gtest(min_time_parse_gtest) add_gtest(profiler_manager_gtest) add_gtest(benchmark_setup_teardown_cb_types_gtest) add_gtest(memory_results_gtest) endif(BENCHMARK_ENABLE_GTEST_TESTS) ############################################################################### # Assembly Unit Tests ############################################################################### if (BENCHMARK_ENABLE_ASSEMBLY_TESTS) if (NOT LLVM_FILECHECK_EXE) message(FATAL_ERROR "LLVM FileCheck is required when including this file") endif() include(AssemblyTests.cmake) add_filecheck_test(donotoptimize_assembly_test) add_filecheck_test(state_assembly_test) add_filecheck_test(clobber_memory_assembly_test) endif() ############################################################################### # Code Coverage Configuration ############################################################################### # Add the coverage command(s) if(CMAKE_BUILD_TYPE) string(TOLOWER ${CMAKE_BUILD_TYPE} CMAKE_BUILD_TYPE_LOWER) endif() if (${CMAKE_BUILD_TYPE_LOWER} MATCHES "coverage") find_program(GCOV gcov) find_program(LCOV lcov) find_program(GENHTML genhtml) find_program(CTEST ctest) if (GCOV AND LCOV AND GENHTML AND CTEST AND HAVE_CXX_FLAG_COVERAGE) add_custom_command( OUTPUT ${CMAKE_BINARY_DIR}/lcov/index.html COMMAND ${LCOV} -q -z -d . COMMAND ${LCOV} -q --no-external -c -b "${CMAKE_SOURCE_DIR}" -d . -o before.lcov -i COMMAND ${CTEST} --force-new-ctest-process COMMAND ${LCOV} -q --no-external -c -b "${CMAKE_SOURCE_DIR}" -d . -o after.lcov COMMAND ${LCOV} -q -a before.lcov -a after.lcov --output-file final.lcov COMMAND ${LCOV} -q -r final.lcov "'${CMAKE_SOURCE_DIR}/test/*'" -o final.lcov COMMAND ${GENHTML} final.lcov -o lcov --demangle-cpp --sort -p "${CMAKE_BINARY_DIR}" -t benchmark DEPENDS filter_test benchmark_test options_test basic_test fixture_test complexity_test WORKING_DIRECTORY ${CMAKE_BINARY_DIR} COMMENT "Running LCOV" ) add_custom_target(coverage DEPENDS ${CMAKE_BINARY_DIR}/lcov/index.html COMMENT "LCOV report at lcov/index.html" ) message(STATUS "Coverage command added") else() if (HAVE_CXX_FLAG_COVERAGE) set(CXX_FLAG_COVERAGE_MESSAGE supported) else() set(CXX_FLAG_COVERAGE_MESSAGE unavailable) endif() message(WARNING "Coverage not available:\n" " gcov: ${GCOV}\n" " lcov: ${LCOV}\n" " genhtml: ${GENHTML}\n" " ctest: ${CTEST}\n" " --coverage flag: ${CXX_FLAG_COVERAGE_MESSAGE}") endif() endif() ================================================ FILE: test/args_product_test.cc ================================================ #include #include #include #include #include "benchmark/benchmark_api.h" #include "benchmark/registration.h" #include "benchmark/state.h" #include "benchmark/utils.h" class ArgsProductFixture : public ::benchmark::Fixture { public: ArgsProductFixture() : expectedValues({{0, 100, 2000, 30000}, {1, 15, 3, 8}, {1, 15, 3, 9}, {1, 15, 7, 8}, {1, 15, 7, 9}, {1, 15, 10, 8}, {1, 15, 10, 9}, {2, 15, 3, 8}, {2, 15, 3, 9}, {2, 15, 7, 8}, {2, 15, 7, 9}, {2, 15, 10, 8}, {2, 15, 10, 9}, {4, 5, 6, 11}}) {} void SetUp(const ::benchmark::State& state) override { std::vector ranges = {state.range(0), state.range(1), state.range(2), state.range(3)}; assert(expectedValues.find(ranges) != expectedValues.end()); actualValues.insert(ranges); } // NOTE: This is not TearDown as we want to check after _all_ runs are // complete. ~ArgsProductFixture() override { if (actualValues != expectedValues) { std::cout << "EXPECTED\n"; for (const auto& v : expectedValues) { std::cout << "{"; for (int64_t iv : v) { std::cout << iv << ", "; } std::cout << "}\n"; } std::cout << "ACTUAL\n"; for (const auto& v : actualValues) { std::cout << "{"; for (int64_t iv : v) { std::cout << iv << ", "; } std::cout << "}\n"; } } } std::set> expectedValues; std::set> actualValues; }; BENCHMARK_DEFINE_F(ArgsProductFixture, Empty)(benchmark::State& state) { for (auto _ : state) { int64_t product = state.range(0) * state.range(1) * state.range(2) * state.range(3); for (int64_t x = 0; x < product; x++) { benchmark::DoNotOptimize(x); } } } BENCHMARK_REGISTER_F(ArgsProductFixture, Empty) ->Args({0, 100, 2000, 30000}) ->ArgsProduct({{1, 2}, {15}, {3, 7, 10}, {8, 9}}) ->Args({4, 5, 6, 11}); BENCHMARK_MAIN(); ================================================ FILE: test/basic_test.cc ================================================ #include "benchmark/registration.h" #include "benchmark/state.h" #include "benchmark/types.h" #include "benchmark/utils.h" #define BASIC_BENCHMARK_TEST(x) BENCHMARK(x)->Arg(8)->Arg(512)->Arg(8192) namespace { void BM_empty(benchmark::State& state) { for (auto _ : state) { auto iterations = static_cast(state.iterations()) * static_cast(state.iterations()); benchmark::DoNotOptimize(iterations); } } BENCHMARK(BM_empty); BENCHMARK(BM_empty)->ThreadPerCpu(); void BM_spin_empty(benchmark::State& state) { for (auto _ : state) { for (auto x = 0; x < state.range(0); ++x) { benchmark::DoNotOptimize(x); } } } BASIC_BENCHMARK_TEST(BM_spin_empty); BASIC_BENCHMARK_TEST(BM_spin_empty)->ThreadPerCpu(); void BM_spin_pause_before(benchmark::State& state) { for (auto i = 0; i < state.range(0); ++i) { benchmark::DoNotOptimize(i); } for (auto _ : state) { for (auto i = 0; i < state.range(0); ++i) { benchmark::DoNotOptimize(i); } } } BASIC_BENCHMARK_TEST(BM_spin_pause_before); BASIC_BENCHMARK_TEST(BM_spin_pause_before)->ThreadPerCpu(); void BM_spin_pause_during(benchmark::State& state) { for (auto _ : state) { state.PauseTiming(); for (auto i = 0; i < state.range(0); ++i) { benchmark::DoNotOptimize(i); } state.ResumeTiming(); for (auto i = 0; i < state.range(0); ++i) { benchmark::DoNotOptimize(i); } } } BASIC_BENCHMARK_TEST(BM_spin_pause_during); BASIC_BENCHMARK_TEST(BM_spin_pause_during)->ThreadPerCpu(); void BM_pause_during(benchmark::State& state) { for (auto _ : state) { state.PauseTiming(); state.ResumeTiming(); } } BENCHMARK(BM_pause_during); BENCHMARK(BM_pause_during)->ThreadPerCpu(); BENCHMARK(BM_pause_during)->UseRealTime(); BENCHMARK(BM_pause_during)->UseRealTime()->ThreadPerCpu(); void BM_spin_pause_after(benchmark::State& state) { for (auto _ : state) { for (auto i = 0; i < state.range(0); ++i) { benchmark::DoNotOptimize(i); } } for (auto i = 0; i < state.range(0); ++i) { benchmark::DoNotOptimize(i); } } BASIC_BENCHMARK_TEST(BM_spin_pause_after); BASIC_BENCHMARK_TEST(BM_spin_pause_after)->ThreadPerCpu(); void BM_spin_pause_before_and_after(benchmark::State& state) { for (auto i = 0; i < state.range(0); ++i) { benchmark::DoNotOptimize(i); } for (auto _ : state) { for (auto i = 0; i < state.range(0); ++i) { benchmark::DoNotOptimize(i); } } for (auto i = 0; i < state.range(0); ++i) { benchmark::DoNotOptimize(i); } } BASIC_BENCHMARK_TEST(BM_spin_pause_before_and_after); BASIC_BENCHMARK_TEST(BM_spin_pause_before_and_after)->ThreadPerCpu(); void BM_empty_stop_start(benchmark::State& state) { for (auto _ : state) { } } BENCHMARK(BM_empty_stop_start); BENCHMARK(BM_empty_stop_start)->ThreadPerCpu(); void BM_KeepRunning(benchmark::State& state) { benchmark::IterationCount iter_count = 0; assert(iter_count == state.iterations()); while (state.KeepRunning()) { ++iter_count; } assert(iter_count == state.iterations()); } BENCHMARK(BM_KeepRunning); void BM_KeepRunningBatch(benchmark::State& state) { // Choose a batch size >1000 to skip the typical runs with iteration // targets of 10, 100 and 1000. If these are not actually skipped the // bug would be detectable as consecutive runs with the same iteration // count. Below we assert that this does not happen. const benchmark::IterationCount batch_size = 1009; static benchmark::IterationCount prior_iter_count = 0; benchmark::IterationCount iter_count = 0; while (state.KeepRunningBatch(batch_size)) { iter_count += batch_size; } assert(state.iterations() == iter_count); // Verify that the iteration count always increases across runs (see // comment above). assert(iter_count == batch_size // max_iterations == 1 || iter_count > prior_iter_count); // max_iterations > batch_size prior_iter_count = iter_count; } // Register with a fixed repetition count to establish the invariant that // the iteration count should always change across runs. This overrides // the --benchmark_repetitions command line flag, which would otherwise // cause this test to fail if set > 1. BENCHMARK(BM_KeepRunningBatch)->Repetitions(1); void BM_RangedFor(benchmark::State& state) { benchmark::IterationCount iter_count = 0; for (auto _ : state) { ++iter_count; } assert(iter_count == state.max_iterations); } BENCHMARK(BM_RangedFor); template void BM_OneTemplateFunc(benchmark::State& state) { auto arg = state.range(0); T sum = 0; for (auto _ : state) { sum += static_cast(arg); } } BENCHMARK(BM_OneTemplateFunc)->Arg(1); BENCHMARK(BM_OneTemplateFunc)->Arg(1); template void BM_TwoTemplateFunc(benchmark::State& state) { auto arg = state.range(0); A sum = 0; B prod = 1; for (auto _ : state) { sum += static_cast(arg); prod *= static_cast(arg); } } BENCHMARK(BM_TwoTemplateFunc)->Arg(1); BENCHMARK(BM_TwoTemplateFunc)->Arg(1); // Ensure that StateIterator provides all the necessary typedefs required to // instantiate std::iterator_traits. static_assert( std::is_same::value_type, typename benchmark::State::StateIterator::value_type>::value, ""); } // end namespace BENCHMARK_MAIN(); ================================================ FILE: test/benchmark_gtest.cc ================================================ #include #include #include #include "../src/benchmark_register.h" #include "benchmark/benchmark_api.h" #include "gmock/gmock.h" #include "gtest/gtest.h" namespace benchmark { namespace internal { namespace { TEST(AddRangeTest, Simple) { std::vector dst; AddRange(&dst, 1, 2, 2); EXPECT_THAT(dst, testing::ElementsAre(1, 2)); } TEST(AddRangeTest, Simple64) { std::vector dst; AddRange(&dst, static_cast(1), static_cast(2), 2); EXPECT_THAT(dst, testing::ElementsAre(1, 2)); } TEST(AddRangeTest, Advanced) { std::vector dst; AddRange(&dst, 5, 15, 2); EXPECT_THAT(dst, testing::ElementsAre(5, 8, 15)); } TEST(AddRangeTest, Advanced64) { std::vector dst; AddRange(&dst, static_cast(5), static_cast(15), 2); EXPECT_THAT(dst, testing::ElementsAre(5, 8, 15)); } TEST(AddRangeTest, FullRange8) { std::vector dst; AddRange(&dst, int8_t{1}, std::numeric_limits::max(), 8); EXPECT_THAT( dst, testing::ElementsAre(int8_t{1}, int8_t{8}, int8_t{64}, int8_t{127})); } TEST(AddRangeTest, FullRange64) { std::vector dst; AddRange(&dst, int64_t{1}, std::numeric_limits::max(), 1024); EXPECT_THAT( dst, testing::ElementsAre(1LL, 1024LL, 1048576LL, 1073741824LL, 1099511627776LL, 1125899906842624LL, 1152921504606846976LL, 9223372036854775807LL)); } TEST(AddRangeTest, NegativeRanges) { std::vector dst; AddRange(&dst, -8, 0, 2); EXPECT_THAT(dst, testing::ElementsAre(-8, -4, -2, -1, 0)); } TEST(AddRangeTest, StrictlyNegative) { std::vector dst; AddRange(&dst, -8, -1, 2); EXPECT_THAT(dst, testing::ElementsAre(-8, -4, -2, -1)); } TEST(AddRangeTest, SymmetricNegativeRanges) { std::vector dst; AddRange(&dst, -8, 8, 2); EXPECT_THAT(dst, testing::ElementsAre(-8, -4, -2, -1, 0, 1, 2, 4, 8)); } TEST(AddRangeTest, SymmetricNegativeRangesOddMult) { std::vector dst; AddRange(&dst, -30, 32, 5); EXPECT_THAT(dst, testing::ElementsAre(-30, -25, -5, -1, 0, 1, 5, 25, 32)); } TEST(AddRangeTest, NegativeRangesAsymmetric) { std::vector dst; AddRange(&dst, -3, 5, 2); EXPECT_THAT(dst, testing::ElementsAre(-3, -2, -1, 0, 1, 2, 4, 5)); } TEST(AddRangeTest, NegativeRangesLargeStep) { // Always include -1, 0, 1 when crossing zero. std::vector dst; AddRange(&dst, -8, 8, 10); EXPECT_THAT(dst, testing::ElementsAre(-8, -1, 0, 1, 8)); } TEST(AddRangeTest, ZeroOnlyRange) { std::vector dst; AddRange(&dst, 0, 0, 2); EXPECT_THAT(dst, testing::ElementsAre(0)); } TEST(AddRangeTest, ZeroStartingRange) { std::vector dst; AddRange(&dst, 0, 2, 2); EXPECT_THAT(dst, testing::ElementsAre(0, 1, 2)); } TEST(AddRangeTest, NegativeRange64) { std::vector dst; AddRange(&dst, -4, 4, 2); EXPECT_THAT(dst, testing::ElementsAre(-4, -2, -1, 0, 1, 2, 4)); } TEST(AddRangeTest, NegativeRangePreservesExistingOrder) { // If elements already exist in the range, ensure we don't change // their ordering by adding negative values. std::vector dst = {1, 2, 3}; AddRange(&dst, -2, 2, 2); EXPECT_THAT(dst, testing::ElementsAre(1, 2, 3, -2, -1, 0, 1, 2)); } TEST(AddRangeTest, FullNegativeRange64) { std::vector dst; const auto min = std::numeric_limits::min(); const auto max = std::numeric_limits::max(); AddRange(&dst, min, max, 1024); EXPECT_THAT( dst, testing::ElementsAreArray(std::vector{ min, -1152921504606846976LL, -1125899906842624LL, -1099511627776LL, -1073741824LL, -1048576LL, -1024LL, -1LL, 0LL, 1LL, 1024LL, 1048576LL, 1073741824LL, 1099511627776LL, 1125899906842624LL, 1152921504606846976LL, max})); } TEST(AddRangeTest, Simple8) { std::vector dst; AddRange(&dst, int8_t{1}, int8_t{8}, int8_t{2}); EXPECT_THAT(dst, testing::ElementsAre(int8_t{1}, int8_t{2}, int8_t{4}, int8_t{8})); } TEST(AddCustomContext, Simple) { std::map*& global_context = GetGlobalContext(); EXPECT_THAT(global_context, nullptr); AddCustomContext("foo", "bar"); AddCustomContext("baz", "qux"); EXPECT_THAT(*global_context, testing::UnorderedElementsAre(testing::Pair("foo", "bar"), testing::Pair("baz", "qux"))); delete global_context; global_context = nullptr; } TEST(AddCustomContext, DuplicateKey) { std::map*& global_context = GetGlobalContext(); EXPECT_THAT(global_context, nullptr); AddCustomContext("foo", "bar"); AddCustomContext("foo", "qux"); EXPECT_THAT(*global_context, testing::UnorderedElementsAre(testing::Pair("foo", "bar"))); delete global_context; global_context = nullptr; } } // namespace } // namespace internal } // namespace benchmark ================================================ FILE: test/benchmark_min_time_flag_iters_test.cc ================================================ #include #include #include #include #include #include "benchmark/benchmark_api.h" #include "benchmark/registration.h" #include "benchmark/reporter.h" #include "benchmark/state.h" // Tests that we can specify the number of iterations with // --benchmark_min_time=x. namespace { class TestReporter : public benchmark::ConsoleReporter { public: bool ReportContext(const Context& context) override { return ConsoleReporter::ReportContext(context); }; void ReportRuns(const std::vector& report) override { assert(report.size() == 1); iter_nums_.push_back(report[0].iterations); ConsoleReporter::ReportRuns(report); }; TestReporter() {} ~TestReporter() override {} const std::vector& GetIters() const { return iter_nums_; } private: std::vector iter_nums_; }; void BM_MyBench(benchmark::State& state) { for (auto s : state) { } } } // end namespace BENCHMARK(BM_MyBench); int main(int argc, char** argv) { benchmark::MaybeReenterWithoutASLR(argc, argv); // Make a fake argv and append the new --benchmark_min_time= to it. int fake_argc = argc + 1; std::vector fake_argv(static_cast(fake_argc)); for (size_t i = 0; i < static_cast(argc); ++i) { fake_argv[i] = argv[i]; } fake_argv[static_cast(argc)] = "--benchmark_min_time=4x"; benchmark::Initialize(&fake_argc, const_cast(fake_argv.data())); TestReporter test_reporter; const size_t returned_count = benchmark::RunSpecifiedBenchmarks(&test_reporter, "BM_MyBench"); assert(returned_count == 1); // Check the executed iters. const std::vector iters = test_reporter.GetIters(); assert(!iters.empty() && iters[0] == 4); return 0; } ================================================ FILE: test/benchmark_min_time_flag_time_test.cc ================================================ #include #include #include #include #include #include #include #include #include "benchmark/benchmark_api.h" #include "benchmark/registration.h" #include "benchmark/reporter.h" #include "benchmark/state.h" // Tests that we can specify the min time with // --benchmark_min_time= (no suffix needed) OR // --benchmark_min_time=s namespace { // This is from benchmark.h typedef int64_t IterationCount; class TestReporter : public benchmark::ConsoleReporter { public: bool ReportContext(const Context& context) override { return ConsoleReporter::ReportContext(context); }; void ReportRuns(const std::vector& report) override { assert(report.size() == 1); ConsoleReporter::ReportRuns(report); }; void ReportRunsConfig(double min_time, bool /* has_explicit_iters */, IterationCount /* iters */) override { min_times_.push_back(min_time); } TestReporter() {} ~TestReporter() override {} const std::vector& GetMinTimes() const { return min_times_; } private: std::vector min_times_; }; bool AlmostEqual(double a, double b) { return std::fabs(a - b) < std::numeric_limits::epsilon(); } void DoTestHelper(int* argc, const char** argv, double expected) { benchmark::Initialize(argc, const_cast(argv)); TestReporter test_reporter; const size_t returned_count = benchmark::RunSpecifiedBenchmarks(&test_reporter, "BM_MyBench"); assert(returned_count == 1); // Check the min_time const std::vector& min_times = test_reporter.GetMinTimes(); assert(!min_times.empty() && AlmostEqual(min_times[0], expected)); } void BM_MyBench(benchmark::State& state) { for (auto s : state) { } } BENCHMARK(BM_MyBench); } // end namespace int main(int argc, char** argv) { benchmark::MaybeReenterWithoutASLR(argc, argv); // Make a fake argv and append the new --benchmark_min_time= to it. int fake_argc = argc + 1; std::vector fake_argv(static_cast(fake_argc)); for (size_t i = 0; i < static_cast(argc); ++i) { fake_argv[i] = argv[i]; } const char* no_suffix = "--benchmark_min_time=4"; const char* with_suffix = "--benchmark_min_time=4.0s"; double expected = 4.0; fake_argv[static_cast(argc)] = no_suffix; DoTestHelper(&fake_argc, fake_argv.data(), expected); fake_argv[static_cast(argc)] = with_suffix; DoTestHelper(&fake_argc, fake_argv.data(), expected); return 0; } ================================================ FILE: test/benchmark_name_gtest.cc ================================================ #include "benchmark/reporter.h" #include "gtest/gtest.h" namespace { using namespace benchmark; using namespace benchmark::internal; TEST(BenchmarkNameTest, Empty) { const auto name = BenchmarkName(); EXPECT_EQ(name.str(), std::string()); } TEST(BenchmarkNameTest, FunctionName) { auto name = BenchmarkName(); name.function_name = "function_name"; EXPECT_EQ(name.str(), "function_name"); } TEST(BenchmarkNameTest, FunctionNameAndArgs) { auto name = BenchmarkName(); name.function_name = "function_name"; name.args = "some_args:3/4/5"; EXPECT_EQ(name.str(), "function_name/some_args:3/4/5"); } TEST(BenchmarkNameTest, MinTime) { auto name = BenchmarkName(); name.function_name = "function_name"; name.args = "some_args:3/4"; name.min_time = "min_time:3.4s"; EXPECT_EQ(name.str(), "function_name/some_args:3/4/min_time:3.4s"); } TEST(BenchmarkNameTest, MinWarmUpTime) { auto name = BenchmarkName(); name.function_name = "function_name"; name.args = "some_args:3/4"; name.min_warmup_time = "min_warmup_time:3.5s"; EXPECT_EQ(name.str(), "function_name/some_args:3/4/min_warmup_time:3.5s"); } TEST(BenchmarkNameTest, Iterations) { auto name = BenchmarkName(); name.function_name = "function_name"; name.min_time = "min_time:3.4s"; name.iterations = "iterations:42"; EXPECT_EQ(name.str(), "function_name/min_time:3.4s/iterations:42"); } TEST(BenchmarkNameTest, Repetitions) { auto name = BenchmarkName(); name.function_name = "function_name"; name.min_time = "min_time:3.4s"; name.repetitions = "repetitions:24"; EXPECT_EQ(name.str(), "function_name/min_time:3.4s/repetitions:24"); } TEST(BenchmarkNameTest, TimeType) { auto name = BenchmarkName(); name.function_name = "function_name"; name.min_time = "min_time:3.4s"; name.time_type = "hammer_time"; EXPECT_EQ(name.str(), "function_name/min_time:3.4s/hammer_time"); } TEST(BenchmarkNameTest, Threads) { auto name = BenchmarkName(); name.function_name = "function_name"; name.min_time = "min_time:3.4s"; name.threads = "threads:256"; EXPECT_EQ(name.str(), "function_name/min_time:3.4s/threads:256"); } TEST(BenchmarkNameTest, TestEmptyFunctionName) { auto name = BenchmarkName(); name.args = "first:3/second:4"; name.threads = "threads:22"; EXPECT_EQ(name.str(), "first:3/second:4/threads:22"); } } // end namespace ================================================ FILE: test/benchmark_random_interleaving_gtest.cc ================================================ #include #include #include #include "../src/commandlineflags.h" #include "../src/string_util.h" #include "benchmark/benchmark_api.h" #include "benchmark/registration.h" #include "benchmark/reporter.h" #include "benchmark/state.h" #include "gmock/gmock.h" #include "gtest/gtest.h" namespace benchmark { BM_DECLARE_bool(benchmark_enable_random_interleaving); BM_DECLARE_string(benchmark_filter); BM_DECLARE_int32(benchmark_repetitions); namespace internal { namespace { class EventQueue : public std::queue { public: void Put(const std::string& event) { push(event); } void Clear() { while (!empty()) { pop(); } } std::string Get() { std::string event = front(); pop(); return event; } }; // NOLINTNEXTLINE(cppcoreguidelines-avoid-non-const-global-variables) EventQueue* const queue = new EventQueue(); class NullReporter : public BenchmarkReporter { public: bool ReportContext(const Context& /*context*/) override { return true; } void ReportRuns(const std::vector& /* report */) override {} }; class BenchmarkTest : public testing::Test { public: static void SetupHook(int /* num_threads */) { queue->push("Setup"); } static void TeardownHook(int /* num_threads */) { queue->push("Teardown"); } static void Execute(const std::string& pattern) { queue->Clear(); std::unique_ptr reporter(new NullReporter()); FLAGS_benchmark_filter = pattern; RunSpecifiedBenchmarks(reporter.get()); queue->Put("DONE"); // End marker } }; void BM_Match1(benchmark::State& state) { const int64_t arg = state.range(0); for (auto _ : state) { } queue->Put(StrFormat("BM_Match1/%d", static_cast(arg))); } BENCHMARK(BM_Match1) ->Iterations(100) ->Arg(1) ->Arg(2) ->Arg(3) ->Range(10, 80) ->Args({90}) ->Args({100}); TEST_F(BenchmarkTest, Match1) { Execute("BM_Match1"); ASSERT_EQ("BM_Match1/1", queue->Get()); ASSERT_EQ("BM_Match1/2", queue->Get()); ASSERT_EQ("BM_Match1/3", queue->Get()); ASSERT_EQ("BM_Match1/10", queue->Get()); ASSERT_EQ("BM_Match1/64", queue->Get()); ASSERT_EQ("BM_Match1/80", queue->Get()); ASSERT_EQ("BM_Match1/90", queue->Get()); ASSERT_EQ("BM_Match1/100", queue->Get()); ASSERT_EQ("DONE", queue->Get()); } TEST_F(BenchmarkTest, Match1WithRepetition) { FLAGS_benchmark_repetitions = 2; Execute("BM_Match1/(64|80)"); ASSERT_EQ("BM_Match1/64", queue->Get()); ASSERT_EQ("BM_Match1/64", queue->Get()); ASSERT_EQ("BM_Match1/80", queue->Get()); ASSERT_EQ("BM_Match1/80", queue->Get()); ASSERT_EQ("DONE", queue->Get()); } TEST_F(BenchmarkTest, Match1WithRandomInterleaving) { FLAGS_benchmark_enable_random_interleaving = true; FLAGS_benchmark_repetitions = 100; std::map element_count; std::map interleaving_count; Execute("BM_Match1/(64|80)"); for (int i = 0; i < 100; ++i) { std::vector interleaving; interleaving.push_back(queue->Get()); interleaving.push_back(queue->Get()); element_count[interleaving[0]]++; element_count[interleaving[1]]++; interleaving_count[StrFormat("%s,%s", interleaving[0].c_str(), interleaving[1].c_str())]++; } EXPECT_EQ(element_count["BM_Match1/64"], 100) << "Unexpected repetitions."; EXPECT_EQ(element_count["BM_Match1/80"], 100) << "Unexpected repetitions."; EXPECT_GE(interleaving_count.size(), 2) << "Interleaving was not randomized."; ASSERT_EQ("DONE", queue->Get()); } } // namespace } // namespace internal } // namespace benchmark ================================================ FILE: test/benchmark_setup_teardown_cb_types_gtest.cc ================================================ #include "benchmark/benchmark_api.h" #include "benchmark/reporter.h" #include "benchmark/state.h" #include "benchmark/types.h" #include "gtest/gtest.h" using benchmark::Benchmark; using benchmark::BenchmarkReporter; using benchmark::callback_function; using benchmark::ClearRegisteredBenchmarks; using benchmark::RegisterBenchmark; using benchmark::RunSpecifiedBenchmarks; using benchmark::State; static int functor_called = 0; struct Functor { void operator()(const benchmark::State& /*unused*/) { functor_called++; } }; class NullReporter : public BenchmarkReporter { public: bool ReportContext(const Context& /*context*/) override { return true; } void ReportRuns(const std::vector& /* report */) override {} }; class BenchmarkTest : public testing::Test { public: Benchmark* bm; NullReporter null_reporter; int setup_calls; int teardown_calls; void SetUp() override { setup_calls = 0; teardown_calls = 0; functor_called = 0; bm = RegisterBenchmark("BM", [](State& st) { for (auto _ : st) { } }); bm->Iterations(1); } void TearDown() override { ClearRegisteredBenchmarks(); } }; // Test that Setup/Teardown can correctly take a lambda expressions TEST_F(BenchmarkTest, LambdaTestCopy) { auto setup_lambda = [this](const State&) { setup_calls++; }; auto teardown_lambda = [this](const State&) { teardown_calls++; }; bm->Setup(setup_lambda); bm->Teardown(teardown_lambda); RunSpecifiedBenchmarks(&null_reporter); EXPECT_EQ(setup_calls, 1); EXPECT_EQ(teardown_calls, 1); } // Test that Setup/Teardown can correctly take a lambda expressions TEST_F(BenchmarkTest, LambdaTestMove) { auto setup_lambda = [this](const State&) { setup_calls++; }; auto teardown_lambda = [this](const State&) { teardown_calls++; }; bm->Setup(std::move(setup_lambda)); bm->Teardown(std::move(teardown_lambda)); RunSpecifiedBenchmarks(&null_reporter); EXPECT_EQ(setup_calls, 1); EXPECT_EQ(teardown_calls, 1); } // Test that Setup/Teardown can correctly take std::function TEST_F(BenchmarkTest, CallbackFunctionCopy) { callback_function setup_lambda = [this](const State&) { setup_calls++; }; callback_function teardown_lambda = [this](const State&) { teardown_calls++; }; bm->Setup(setup_lambda); bm->Teardown(teardown_lambda); RunSpecifiedBenchmarks(&null_reporter); EXPECT_EQ(setup_calls, 1); EXPECT_EQ(teardown_calls, 1); } // Test that Setup/Teardown can correctly take std::function TEST_F(BenchmarkTest, CallbackFunctionMove) { callback_function setup_lambda = [this](const State&) { setup_calls++; }; callback_function teardown_lambda = [this](const State&) { teardown_calls++; }; bm->Setup(std::move(setup_lambda)); bm->Teardown(std::move(teardown_lambda)); RunSpecifiedBenchmarks(&null_reporter); EXPECT_EQ(setup_calls, 1); EXPECT_EQ(teardown_calls, 1); } // Test that Setup/Teardown can correctly take functors TEST_F(BenchmarkTest, FunctorCopy) { Functor func; bm->Setup(func); bm->Teardown(func); RunSpecifiedBenchmarks(&null_reporter); EXPECT_EQ(functor_called, 2); } // Test that Setup/Teardown can correctly take functors TEST_F(BenchmarkTest, FunctorMove) { Functor func1; Functor func2; bm->Setup(std::move(func1)); bm->Teardown(std::move(func2)); RunSpecifiedBenchmarks(&null_reporter); EXPECT_EQ(functor_called, 2); } // Test that Setup/Teardown can not take nullptr TEST_F(BenchmarkTest, NullptrTest) { #if GTEST_HAS_DEATH_TEST // Tests only runnable in debug mode (when BM_CHECK is enabled). #ifndef NDEBUG #ifndef TEST_BENCHMARK_LIBRARY_HAS_NO_ASSERTIONS EXPECT_DEATH(bm->Setup(nullptr), "setup != nullptr"); EXPECT_DEATH(bm->Teardown(nullptr), "teardown != nullptr"); #else GTEST_SKIP() << "Test skipped because BM_CHECK is disabled"; #endif #endif #endif } ================================================ FILE: test/benchmark_setup_teardown_test.cc ================================================ #include #include #include #include #include #include "benchmark/benchmark_api.h" #include "benchmark/registration.h" #include "benchmark/state.h" // Test that Setup() and Teardown() are called exactly once // for each benchmark run (single-threaded). namespace { namespace singlethreaded { static int setup_call = 0; static int teardown_call = 0; } // namespace singlethreaded } // namespace static void DoSetup1(const benchmark::State& state) { ++singlethreaded::setup_call; // Setup/Teardown should never be called with any thread_idx != 0. assert(state.thread_index() == 0); } static void DoTeardown1(const benchmark::State& state) { ++singlethreaded::teardown_call; assert(state.thread_index() == 0); } static void BM_with_setup(benchmark::State& state) { for (auto s : state) { } } BENCHMARK(BM_with_setup) ->Arg(1) ->Arg(3) ->Arg(5) ->Arg(7) ->Iterations(100) ->Setup(DoSetup1) ->Teardown(DoTeardown1); // Test that Setup() and Teardown() are called once for each group of threads. namespace { namespace concurrent { static std::atomic setup_call(0); static std::atomic teardown_call(0); static std::atomic func_call(0); } // namespace concurrent void DoSetup2(const benchmark::State& state) { concurrent::setup_call.fetch_add(1, std::memory_order_acquire); assert(state.thread_index() == 0); } void DoTeardown2(const benchmark::State& state) { concurrent::teardown_call.fetch_add(1, std::memory_order_acquire); assert(state.thread_index() == 0); } void BM_concurrent(benchmark::State& state) { for (auto s : state) { } concurrent::func_call.fetch_add(1, std::memory_order_acquire); } BENCHMARK(BM_concurrent) ->Setup(DoSetup2) ->Teardown(DoTeardown2) ->Iterations(100) ->Threads(5) ->Threads(10) ->Threads(15); // Testing interaction with Fixture::Setup/Teardown namespace fixture_interaction { int setup = 0; int fixture_setup = 0; } // namespace fixture_interaction #define FIXTURE_BECHMARK_NAME MyFixture class FIXTURE_BECHMARK_NAME : public ::benchmark::Fixture { public: void SetUp(const ::benchmark::State& /*unused*/) override { fixture_interaction::fixture_setup++; } ~FIXTURE_BECHMARK_NAME() override {} }; BENCHMARK_F(FIXTURE_BECHMARK_NAME, BM_WithFixture)(benchmark::State& st) { for (auto _ : st) { } } void DoSetupWithFixture(const benchmark::State& /*unused*/) { fixture_interaction::setup++; } BENCHMARK_REGISTER_F(FIXTURE_BECHMARK_NAME, BM_WithFixture) ->Arg(1) ->Arg(3) ->Arg(5) ->Arg(7) ->Setup(DoSetupWithFixture) ->Repetitions(1) ->Iterations(100); // Testing repetitions. namespace repetitions { int setup = 0; } void DoSetupWithRepetitions(const benchmark::State& /*unused*/) { repetitions::setup++; } void BM_WithRep(benchmark::State& state) { for (auto _ : state) { } } BENCHMARK(BM_WithRep) ->Arg(1) ->Arg(3) ->Arg(5) ->Arg(7) ->Setup(DoSetupWithRepetitions) ->Iterations(100) ->Repetitions(4); } // namespace int main(int argc, char** argv) { benchmark::MaybeReenterWithoutASLR(argc, argv); benchmark::Initialize(&argc, argv); size_t ret = benchmark::RunSpecifiedBenchmarks("."); assert(ret > 0); // Setup/Teardown is called once for each arg group (1,3,5,7). assert(singlethreaded::setup_call == 4); assert(singlethreaded::teardown_call == 4); // 3 group of threads calling this function (3,5,10). assert(concurrent::setup_call.load(std::memory_order_relaxed) == 3); assert(concurrent::teardown_call.load(std::memory_order_relaxed) == 3); assert((5 + 10 + 15) == concurrent::func_call.load(std::memory_order_relaxed)); // Setup is called 4 times, once for each arg group (1,3,5,7) assert(fixture_interaction::setup == 4); // Fixture::Setup is called every time the bm routine is run. // The exact number is indeterministic, so we just assert that // it's more than setup. assert(fixture_interaction::fixture_setup > fixture_interaction::setup); // Setup is call once for each repetition * num_arg = 4 * 4 = 16. assert(repetitions::setup == 16); return 0; } ================================================ FILE: test/benchmark_test.cc ================================================ #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include "benchmark/registration.h" #include "benchmark/state.h" #include "benchmark/utils.h" #if defined(__GNUC__) #define BENCHMARK_NOINLINE __attribute__((noinline)) #else #define BENCHMARK_NOINLINE #endif namespace { int BENCHMARK_NOINLINE Factorial(int n) { return (n == 1) ? 1 : n * Factorial(n - 1); } double CalculatePi(int depth) { double pi = 0.0; for (int i = 0; i < depth; ++i) { double numerator = static_cast(((i % 2) * 2) - 1); double denominator = static_cast((2 * i) - 1); pi += numerator / denominator; } return (pi - 1.0) * 4; } std::set ConstructRandomSet(int64_t size) { std::set s; for (int i = 0; i < size; ++i) { s.insert(s.end(), i); } return s; } // NOLINTBEGIN(cppcoreguidelines-avoid-non-const-global-variables) std::mutex test_vector_mu; std::optional> test_vector; // NOLINTEND(cppcoreguidelines-avoid-non-const-global-variables) void BM_Factorial(benchmark::State& state) { int fac_42 = 0; for (auto _ : state) { fac_42 = Factorial(8); } // Prevent compiler optimizations std::stringstream ss; ss << fac_42; state.SetLabel(ss.str()); } BENCHMARK(BM_Factorial); BENCHMARK(BM_Factorial)->UseRealTime(); void BM_CalculatePiRange(benchmark::State& state) { double pi = 0.0; for (auto _ : state) { pi = CalculatePi(static_cast(state.range(0))); } std::stringstream ss; ss << pi; state.SetLabel(ss.str()); } BENCHMARK_RANGE(BM_CalculatePiRange, 1, 1024 * 1024); void BM_CalculatePi(benchmark::State& state) { static const int depth = 1024; for (auto _ : state) { double pi = CalculatePi(static_cast(depth)); benchmark::DoNotOptimize(pi); } } BENCHMARK(BM_CalculatePi)->Threads(8); BENCHMARK(BM_CalculatePi)->ThreadRange(1, 32); BENCHMARK(BM_CalculatePi)->ThreadPerCpu(); void BM_SetInsert(benchmark::State& state) { std::set data; for (auto _ : state) { state.PauseTiming(); data = ConstructRandomSet(state.range(0)); state.ResumeTiming(); for (int j = 0; j < state.range(1); ++j) { data.insert(rand()); } } state.SetItemsProcessed(state.iterations() * state.range(1)); state.SetBytesProcessed(state.iterations() * state.range(1) * static_cast(sizeof(int))); } // Test many inserts at once to reduce the total iterations needed. Otherwise, // the slower, non-timed part of each iteration will make the benchmark take // forever. BENCHMARK(BM_SetInsert)->Ranges({{1 << 10, 8 << 10}, {128, 512}}); template void BM_Sequential(benchmark::State& state) { ValueType v = 42; for (auto _ : state) { Container c; for (int64_t i = state.range(0); --i;) { c.push_back(v); } } const int64_t items_processed = state.iterations() * state.range(0); state.SetItemsProcessed(items_processed); state.SetBytesProcessed(items_processed * static_cast(sizeof(v))); } BENCHMARK_TEMPLATE2(BM_Sequential, std::vector, int) ->Range(1 << 0, 1 << 10); BENCHMARK_TEMPLATE(BM_Sequential, std::list)->Range(1 << 0, 1 << 10); // Test the variadic version of BENCHMARK_TEMPLATE in C++11 and beyond. BENCHMARK_TEMPLATE(BM_Sequential, std::vector, int)->Arg(512); void BM_StringCompare(benchmark::State& state) { size_t len = static_cast(state.range(0)); std::string s1(len, '-'); std::string s2(len, '-'); for (auto _ : state) { auto comp = s1.compare(s2); benchmark::DoNotOptimize(comp); } } BENCHMARK(BM_StringCompare)->Range(1, 1 << 20); void BM_SetupTeardown(benchmark::State& state) { if (state.thread_index() == 0) { // No need to lock test_vector_mu here as this is running single-threaded. test_vector = std::vector(); } int i = 0; for (auto _ : state) { std::lock_guard l(test_vector_mu); if (i % 2 == 0) { test_vector->push_back(i); } else { test_vector->pop_back(); } ++i; } if (state.thread_index() == 0) { test_vector.reset(); } } BENCHMARK(BM_SetupTeardown)->ThreadPerCpu(); void BM_LongTest(benchmark::State& state) { double tracker = 0.0; for (auto _ : state) { for (int i = 0; i < state.range(0); ++i) { benchmark::DoNotOptimize(tracker += i); } } } BENCHMARK(BM_LongTest)->Range(1 << 16, 1 << 28); void BM_ParallelMemset(benchmark::State& state) { int64_t size = state.range(0) / static_cast(sizeof(int)); int thread_size = static_cast(size) / state.threads(); int from = thread_size * state.thread_index(); int to = from + thread_size; if (state.thread_index() == 0) { test_vector = std::vector(static_cast(size)); } for (auto _ : state) { for (int i = from; i < to; i++) { // No need to lock test_vector_mu as ranges // do not overlap between threads. benchmark::DoNotOptimize(test_vector->at(static_cast(i)) = 1); } } if (state.thread_index() == 0) { test_vector.reset(); } } BENCHMARK(BM_ParallelMemset)->Arg(10 << 20)->ThreadRange(1, 4); void BM_ManualTiming(benchmark::State& state) { int64_t slept_for = 0; int64_t microseconds = state.range(0); std::chrono::duration sleep_duration{ static_cast(microseconds)}; for (auto _ : state) { auto start = std::chrono::high_resolution_clock::now(); // Simulate some useful workload with a sleep std::this_thread::sleep_for( std::chrono::duration_cast(sleep_duration)); auto end = std::chrono::high_resolution_clock::now(); auto elapsed = std::chrono::duration_cast>(end - start); state.SetIterationTime(elapsed.count()); slept_for += microseconds; } state.SetItemsProcessed(slept_for); } BENCHMARK(BM_ManualTiming)->Range(1, 1 << 14)->UseRealTime(); BENCHMARK(BM_ManualTiming)->Range(1, 1 << 14)->UseManualTime(); template void BM_with_args(benchmark::State& state, Args&&...) { for (auto _ : state) { } } BENCHMARK_CAPTURE(BM_with_args, int_test, 42, 43, 44); BENCHMARK_CAPTURE(BM_with_args, string_and_pair_test, std::string("abc"), std::pair(42, 3.8)); void BM_non_template_args(benchmark::State& state, int, double) { while (state.KeepRunning()) { } } BENCHMARK_CAPTURE(BM_non_template_args, basic_test, 0, 0); template void BM_template2_capture(benchmark::State& state, ExtraArgs&&... extra_args) { static_assert(std::is_same::value, ""); static_assert(std::is_same::value, ""); static_assert(std::is_same::value, ""); unsigned int dummy[sizeof...(ExtraArgs)] = {extra_args...}; assert(dummy[0] == 42); for (auto _ : state) { } } BENCHMARK_TEMPLATE2_CAPTURE(BM_template2_capture, void, char*, foo, 42U); BENCHMARK_CAPTURE((BM_template2_capture), foo, 42U); template void BM_template1_capture(benchmark::State& state, ExtraArgs&&... extra_args) { static_assert(std::is_same::value, ""); static_assert(std::is_same::value, ""); unsigned long dummy[sizeof...(ExtraArgs)] = {extra_args...}; assert(dummy[0] == 24); for (auto _ : state) { } } BENCHMARK_TEMPLATE1_CAPTURE(BM_template1_capture, void, foo, 24UL); BENCHMARK_CAPTURE(BM_template1_capture, foo, 24UL); void BM_DenseThreadRanges(benchmark::State& st) { switch (st.range(0)) { case 1: assert(st.threads() == 1 || st.threads() == 2 || st.threads() == 3); break; case 2: assert(st.threads() == 1 || st.threads() == 3 || st.threads() == 4); break; case 3: assert(st.threads() == 5 || st.threads() == 8 || st.threads() == 11 || st.threads() == 14); break; default: assert(false && "Invalid test case number"); } while (st.KeepRunning()) { } } BENCHMARK(BM_DenseThreadRanges)->Arg(1)->DenseThreadRange(1, 3); BENCHMARK(BM_DenseThreadRanges)->Arg(2)->DenseThreadRange(1, 4, 2); BENCHMARK(BM_DenseThreadRanges)->Arg(3)->DenseThreadRange(5, 14, 3); void BM_BenchmarkName(benchmark::State& state) { for (auto _ : state) { } // Check that the benchmark name is passed correctly to `state`. assert("BM_BenchmarkName" == state.name()); } BENCHMARK(BM_BenchmarkName); // regression test for #1446 template void BM_templated_test(benchmark::State& state) { for (auto _ : state) { type created_string; benchmark::DoNotOptimize(created_string); } } const auto BM_templated_test_double = BM_templated_test>; BENCHMARK(BM_templated_test_double); } // end namespace BENCHMARK_MAIN(); ================================================ FILE: test/clobber_memory_assembly_test.cc ================================================ #include "benchmark/macros.h" #include "benchmark/utils.h" #ifdef __clang__ #pragma clang diagnostic ignored "-Wreturn-type" #endif BENCHMARK_DISABLE_DEPRECATED_WARNING extern "C" { extern int ExternInt; extern int ExternInt2; extern int ExternInt3; } // CHECK-LABEL: test_basic: extern "C" void test_basic() { int x; benchmark::DoNotOptimize(&x); x = 101; benchmark::ClobberMemory(); // CHECK: leaq [[DEST:[^,]+]], %rax // CHECK: movl $101, [[DEST]] // CHECK: ret } // CHECK-LABEL: test_redundant_store: extern "C" void test_redundant_store() { ExternInt = 3; benchmark::ClobberMemory(); ExternInt = 51; // CHECK-DAG: ExternInt // CHECK-DAG: movl $3 // CHECK: movl $51 } // CHECK-LABEL: test_redundant_read: extern "C" void test_redundant_read() { int x; benchmark::DoNotOptimize(&x); x = ExternInt; benchmark::ClobberMemory(); x = ExternInt2; // CHECK: leaq [[DEST:[^,]+]], %rax // CHECK: ExternInt(%rip) // CHECK: movl %eax, [[DEST]] // CHECK-NOT: ExternInt2 // CHECK: ret } // CHECK-LABEL: test_redundant_read2: extern "C" void test_redundant_read2() { int x; benchmark::DoNotOptimize(&x); x = ExternInt; benchmark::ClobberMemory(); x = ExternInt2; benchmark::ClobberMemory(); // CHECK: leaq [[DEST:[^,]+]], %rax // CHECK: ExternInt(%rip) // CHECK: movl %eax, [[DEST]] // CHECK: ExternInt2(%rip) // CHECK: movl %eax, [[DEST]] // CHECK: ret } ================================================ FILE: test/commandlineflags_gtest.cc ================================================ #include #include "../src/commandlineflags.h" #include "../src/internal_macros.h" #include "gmock/gmock.h" #include "gtest/gtest.h" namespace benchmark { namespace { #if defined(BENCHMARK_OS_WINDOWS) int setenv(const char* name, const char* value, int overwrite) { if (!overwrite) { // NOTE: getenv_s is far superior but not available under mingw. char* env_value = getenv(name); if (env_value == nullptr) { return -1; } } return _putenv_s(name, value); } int unsetenv(const char* name) { return _putenv_s(name, ""); } #endif // BENCHMARK_OS_WINDOWS TEST(BoolFromEnv, Default) { ASSERT_EQ(unsetenv("NOT_IN_ENV"), 0); EXPECT_EQ(BoolFromEnv("not_in_env", true), true); } TEST(BoolFromEnv, False) { ASSERT_EQ(setenv("IN_ENV", "0", 1), 0); EXPECT_EQ(BoolFromEnv("in_env", true), false); unsetenv("IN_ENV"); ASSERT_EQ(setenv("IN_ENV", "N", 1), 0); EXPECT_EQ(BoolFromEnv("in_env", true), false); unsetenv("IN_ENV"); ASSERT_EQ(setenv("IN_ENV", "n", 1), 0); EXPECT_EQ(BoolFromEnv("in_env", true), false); unsetenv("IN_ENV"); ASSERT_EQ(setenv("IN_ENV", "NO", 1), 0); EXPECT_EQ(BoolFromEnv("in_env", true), false); unsetenv("IN_ENV"); ASSERT_EQ(setenv("IN_ENV", "No", 1), 0); EXPECT_EQ(BoolFromEnv("in_env", true), false); unsetenv("IN_ENV"); ASSERT_EQ(setenv("IN_ENV", "no", 1), 0); EXPECT_EQ(BoolFromEnv("in_env", true), false); unsetenv("IN_ENV"); ASSERT_EQ(setenv("IN_ENV", "F", 1), 0); EXPECT_EQ(BoolFromEnv("in_env", true), false); unsetenv("IN_ENV"); ASSERT_EQ(setenv("IN_ENV", "f", 1), 0); EXPECT_EQ(BoolFromEnv("in_env", true), false); unsetenv("IN_ENV"); ASSERT_EQ(setenv("IN_ENV", "FALSE", 1), 0); EXPECT_EQ(BoolFromEnv("in_env", true), false); unsetenv("IN_ENV"); ASSERT_EQ(setenv("IN_ENV", "False", 1), 0); EXPECT_EQ(BoolFromEnv("in_env", true), false); unsetenv("IN_ENV"); ASSERT_EQ(setenv("IN_ENV", "false", 1), 0); EXPECT_EQ(BoolFromEnv("in_env", true), false); unsetenv("IN_ENV"); ASSERT_EQ(setenv("IN_ENV", "OFF", 1), 0); EXPECT_EQ(BoolFromEnv("in_env", true), false); unsetenv("IN_ENV"); ASSERT_EQ(setenv("IN_ENV", "Off", 1), 0); EXPECT_EQ(BoolFromEnv("in_env", true), false); unsetenv("IN_ENV"); ASSERT_EQ(setenv("IN_ENV", "off", 1), 0); EXPECT_EQ(BoolFromEnv("in_env", true), false); unsetenv("IN_ENV"); } TEST(BoolFromEnv, True) { ASSERT_EQ(setenv("IN_ENV", "1", 1), 0); EXPECT_EQ(BoolFromEnv("in_env", false), true); unsetenv("IN_ENV"); ASSERT_EQ(setenv("IN_ENV", "Y", 1), 0); EXPECT_EQ(BoolFromEnv("in_env", false), true); unsetenv("IN_ENV"); ASSERT_EQ(setenv("IN_ENV", "y", 1), 0); EXPECT_EQ(BoolFromEnv("in_env", false), true); unsetenv("IN_ENV"); ASSERT_EQ(setenv("IN_ENV", "YES", 1), 0); EXPECT_EQ(BoolFromEnv("in_env", false), true); unsetenv("IN_ENV"); ASSERT_EQ(setenv("IN_ENV", "Yes", 1), 0); EXPECT_EQ(BoolFromEnv("in_env", false), true); unsetenv("IN_ENV"); ASSERT_EQ(setenv("IN_ENV", "yes", 1), 0); EXPECT_EQ(BoolFromEnv("in_env", false), true); unsetenv("IN_ENV"); ASSERT_EQ(setenv("IN_ENV", "T", 1), 0); EXPECT_EQ(BoolFromEnv("in_env", false), true); unsetenv("IN_ENV"); ASSERT_EQ(setenv("IN_ENV", "t", 1), 0); EXPECT_EQ(BoolFromEnv("in_env", false), true); unsetenv("IN_ENV"); ASSERT_EQ(setenv("IN_ENV", "TRUE", 1), 0); EXPECT_EQ(BoolFromEnv("in_env", false), true); unsetenv("IN_ENV"); ASSERT_EQ(setenv("IN_ENV", "True", 1), 0); EXPECT_EQ(BoolFromEnv("in_env", false), true); unsetenv("IN_ENV"); ASSERT_EQ(setenv("IN_ENV", "true", 1), 0); EXPECT_EQ(BoolFromEnv("in_env", false), true); unsetenv("IN_ENV"); ASSERT_EQ(setenv("IN_ENV", "ON", 1), 0); EXPECT_EQ(BoolFromEnv("in_env", false), true); unsetenv("IN_ENV"); ASSERT_EQ(setenv("IN_ENV", "On", 1), 0); EXPECT_EQ(BoolFromEnv("in_env", false), true); unsetenv("IN_ENV"); ASSERT_EQ(setenv("IN_ENV", "on", 1), 0); EXPECT_EQ(BoolFromEnv("in_env", false), true); unsetenv("IN_ENV"); #ifndef BENCHMARK_OS_WINDOWS ASSERT_EQ(setenv("IN_ENV", "", 1), 0); EXPECT_EQ(BoolFromEnv("in_env", false), true); unsetenv("IN_ENV"); #endif } TEST(Int32FromEnv, NotInEnv) { ASSERT_EQ(unsetenv("NOT_IN_ENV"), 0); EXPECT_EQ(Int32FromEnv("not_in_env", 42), 42); } TEST(Int32FromEnv, InvalidInteger) { ASSERT_EQ(setenv("IN_ENV", "foo", 1), 0); EXPECT_EQ(Int32FromEnv("in_env", 42), 42); unsetenv("IN_ENV"); } TEST(Int32FromEnv, ValidInteger) { ASSERT_EQ(setenv("IN_ENV", "42", 1), 0); EXPECT_EQ(Int32FromEnv("in_env", 64), 42); unsetenv("IN_ENV"); } TEST(DoubleFromEnv, NotInEnv) { ASSERT_EQ(unsetenv("NOT_IN_ENV"), 0); EXPECT_EQ(DoubleFromEnv("not_in_env", 0.51), 0.51); } TEST(DoubleFromEnv, InvalidReal) { ASSERT_EQ(setenv("IN_ENV", "foo", 1), 0); EXPECT_EQ(DoubleFromEnv("in_env", 0.51), 0.51); unsetenv("IN_ENV"); } TEST(DoubleFromEnv, ValidReal) { ASSERT_EQ(setenv("IN_ENV", "0.51", 1), 0); EXPECT_EQ(DoubleFromEnv("in_env", 0.71), 0.51); unsetenv("IN_ENV"); } TEST(StringFromEnv, Default) { ASSERT_EQ(unsetenv("NOT_IN_ENV"), 0); EXPECT_STREQ(StringFromEnv("not_in_env", "foo"), "foo"); } TEST(StringFromEnv, Valid) { ASSERT_EQ(setenv("IN_ENV", "foo", 1), 0); EXPECT_STREQ(StringFromEnv("in_env", "bar"), "foo"); unsetenv("IN_ENV"); } TEST(KvPairsFromEnv, Default) { ASSERT_EQ(unsetenv("NOT_IN_ENV"), 0); EXPECT_THAT(KvPairsFromEnv("not_in_env", {{"foo", "bar"}}), testing::ElementsAre(testing::Pair("foo", "bar"))); } TEST(KvPairsFromEnv, MalformedReturnsDefault) { ASSERT_EQ(setenv("IN_ENV", "foo", 1), 0); EXPECT_THAT(KvPairsFromEnv("in_env", {{"foo", "bar"}}), testing::ElementsAre(testing::Pair("foo", "bar"))); unsetenv("IN_ENV"); } TEST(KvPairsFromEnv, Single) { ASSERT_EQ(setenv("IN_ENV", "foo=bar", 1), 0); EXPECT_THAT(KvPairsFromEnv("in_env", {}), testing::ElementsAre(testing::Pair("foo", "bar"))); unsetenv("IN_ENV"); } TEST(KvPairsFromEnv, Multiple) { ASSERT_EQ(setenv("IN_ENV", "foo=bar,baz=qux", 1), 0); EXPECT_THAT(KvPairsFromEnv("in_env", {}), testing::UnorderedElementsAre(testing::Pair("foo", "bar"), testing::Pair("baz", "qux"))); unsetenv("IN_ENV"); } } // namespace } // namespace benchmark ================================================ FILE: test/complexity_test.cc ================================================ #undef NDEBUG #include #include #include #include #include "benchmark/benchmark_api.h" #include "benchmark/registration.h" #include "benchmark/state.h" #include "benchmark/statistics.h" #include "benchmark/types.h" #include "benchmark/utils.h" #include "output_test.h" namespace { #define ADD_COMPLEXITY_CASES(...) \ const int CONCAT(dummy, __LINE__) = AddComplexityTest(__VA_ARGS__) int AddComplexityTest(const std::string& test_name, const std::string& big_o_test_name, const std::string& rms_test_name, const std::string& big_o, int family_index) { SetSubstitutions({{"%name", test_name}, {"%bigo_name", big_o_test_name}, {"%rms_name", rms_test_name}, {"%bigo_str", "[ ]* %float " + big_o}, {"%bigo", big_o}, {"%rms", "[ ]*[0-9]+ %"}}); AddCases( TC_ConsoleOut, {{"^%bigo_name %bigo_str %bigo_str[ ]*$"}, {"^%bigo_name", MR_Not}, // Assert we we didn't only matched a name. {"^%rms_name %rms %rms[ ]*$", MR_Next}}); AddCases( TC_JSONOut, {{"\"name\": \"%bigo_name\",$"}, {"\"family_index\": " + std::to_string(family_index) + ",$", MR_Next}, {"\"per_family_instance_index\": 0,$", MR_Next}, {"\"run_name\": \"%name\",$", MR_Next}, {"\"run_type\": \"aggregate\",$", MR_Next}, {"\"repetitions\": %int,$", MR_Next}, {"\"threads\": 1,$", MR_Next}, {"\"aggregate_name\": \"BigO\",$", MR_Next}, {"\"aggregate_unit\": \"time\",$", MR_Next}, {"\"cpu_coefficient\": %float,$", MR_Next}, {"\"real_coefficient\": %float,$", MR_Next}, {"\"big_o\": \"%bigo\",$", MR_Next}, {"\"time_unit\": \"ns\"$", MR_Next}, {"}", MR_Next}, {"\"name\": \"%rms_name\",$"}, {"\"family_index\": " + std::to_string(family_index) + ",$", MR_Next}, {"\"per_family_instance_index\": 0,$", MR_Next}, {"\"run_name\": \"%name\",$", MR_Next}, {"\"run_type\": \"aggregate\",$", MR_Next}, {"\"repetitions\": %int,$", MR_Next}, {"\"threads\": 1,$", MR_Next}, {"\"aggregate_name\": \"RMS\",$", MR_Next}, {"\"aggregate_unit\": \"percentage\",$", MR_Next}, {"\"rms\": %float$", MR_Next}, {"}", MR_Next}}); AddCases(TC_CSVOut, {{"^\"%bigo_name\",,%float,%float,%bigo,,,,,$"}, {"^\"%bigo_name\"", MR_Not}, {"^\"%rms_name\",,%float,%float,,,,,,$", MR_Next}}); return 0; } // ========================================================================= // // --------------------------- Testing BigO O(1) --------------------------- // // ========================================================================= // void BM_Complexity_O1(benchmark::State& state) { for (auto _ : state) { // This test requires a non-zero CPU time to avoid divide-by-zero benchmark::DoNotOptimize(state.iterations()); double tmp = static_cast(state.iterations()); benchmark::DoNotOptimize(tmp); for (benchmark::IterationCount i = 0; i < state.iterations(); ++i) { benchmark::DoNotOptimize(state.iterations()); tmp *= static_cast(state.iterations()); benchmark::DoNotOptimize(tmp); } // always 1ns per iteration state.SetIterationTime(42 * 1e-9); } state.SetComplexityN(state.range(0)); } BENCHMARK(BM_Complexity_O1) ->Range(1, 1 << 18) ->UseManualTime() ->Complexity(benchmark::o1); BENCHMARK(BM_Complexity_O1)->Range(1, 1 << 18)->UseManualTime()->Complexity(); BENCHMARK(BM_Complexity_O1) ->Range(1, 1 << 18) ->UseManualTime() ->Complexity([](benchmark::IterationCount) { return 1.0; }); constexpr char one_test_name[] = "BM_Complexity_O1/manual_time"; constexpr char big_o_1_test_name[] = "BM_Complexity_O1/manual_time_BigO"; constexpr char rms_o_1_test_name[] = "BM_Complexity_O1/manual_time_RMS"; constexpr char enum_auto_big_o_1[] = "\\([0-9]+\\)"; constexpr char lambda_big_o_1[] = "f\\(N\\)"; // Add enum tests ADD_COMPLEXITY_CASES(one_test_name, big_o_1_test_name, rms_o_1_test_name, enum_auto_big_o_1, /*family_index=*/0); // Add auto tests ADD_COMPLEXITY_CASES(one_test_name, big_o_1_test_name, rms_o_1_test_name, enum_auto_big_o_1, /*family_index=*/1); // Add lambda tests ADD_COMPLEXITY_CASES(one_test_name, big_o_1_test_name, rms_o_1_test_name, lambda_big_o_1, /*family_index=*/2); // ========================================================================= // // --------------------------- Testing BigO O(N) --------------------------- // // ========================================================================= // void BM_Complexity_O_N(benchmark::State& state) { for (auto _ : state) { // This test requires a non-zero CPU time to avoid divide-by-zero benchmark::DoNotOptimize(state.iterations()); double tmp = static_cast(state.iterations()); benchmark::DoNotOptimize(tmp); for (benchmark::IterationCount i = 0; i < state.iterations(); ++i) { benchmark::DoNotOptimize(state.iterations()); tmp *= static_cast(state.iterations()); benchmark::DoNotOptimize(tmp); } // 1ns per iteration per entry state.SetIterationTime(static_cast(state.range(0)) * 42 * 1e-9); } state.SetComplexityN(state.range(0)); } BENCHMARK(BM_Complexity_O_N) ->RangeMultiplier(2) ->Range(1 << 10, 1 << 20) ->UseManualTime() ->Complexity(benchmark::oN); BENCHMARK(BM_Complexity_O_N) ->RangeMultiplier(2) ->Range(1 << 10, 1 << 20) ->UseManualTime() ->Complexity(); BENCHMARK(BM_Complexity_O_N) ->RangeMultiplier(2) ->Range(1 << 10, 1 << 20) ->UseManualTime() ->Complexity([](benchmark::IterationCount n) -> double { return static_cast(n); }); constexpr char n_test_name[] = "BM_Complexity_O_N/manual_time"; constexpr char big_o_n_test_name[] = "BM_Complexity_O_N/manual_time_BigO"; constexpr char rms_o_n_test_name[] = "BM_Complexity_O_N/manual_time_RMS"; constexpr char enum_auto_big_o_n[] = "N"; constexpr char lambda_big_o_n[] = "f\\(N\\)"; // Add enum tests ADD_COMPLEXITY_CASES(n_test_name, big_o_n_test_name, rms_o_n_test_name, enum_auto_big_o_n, /*family_index=*/3); // Add auto tests ADD_COMPLEXITY_CASES(n_test_name, big_o_n_test_name, rms_o_n_test_name, enum_auto_big_o_n, /*family_index=*/4); // Add lambda tests ADD_COMPLEXITY_CASES(n_test_name, big_o_n_test_name, rms_o_n_test_name, lambda_big_o_n, /*family_index=*/5); // ========================================================================= // // ------------------------- Testing BigO O(NlgN) ------------------------- // // ========================================================================= // const double kLog2E = 1.44269504088896340736; void BM_Complexity_O_N_log_N(benchmark::State& state) { for (auto _ : state) { // This test requires a non-zero CPU time to avoid divide-by-zero benchmark::DoNotOptimize(state.iterations()); double tmp = static_cast(state.iterations()); benchmark::DoNotOptimize(tmp); for (benchmark::IterationCount i = 0; i < state.iterations(); ++i) { benchmark::DoNotOptimize(state.iterations()); tmp *= static_cast(state.iterations()); benchmark::DoNotOptimize(tmp); } state.SetIterationTime(static_cast(state.range(0)) * kLog2E * std::log(state.range(0)) * 42 * 1e-9); } state.SetComplexityN(state.range(0)); } BENCHMARK(BM_Complexity_O_N_log_N) ->RangeMultiplier(2) ->Range(1 << 10, 1U << 24) ->UseManualTime() ->Complexity(benchmark::oNLogN); BENCHMARK(BM_Complexity_O_N_log_N) ->RangeMultiplier(2) ->Range(1 << 10, 1U << 24) ->UseManualTime() ->Complexity(); BENCHMARK(BM_Complexity_O_N_log_N) ->RangeMultiplier(2) ->Range(1 << 10, 1U << 24) ->UseManualTime() ->Complexity([](benchmark::IterationCount n) { return kLog2E * static_cast(n) * std::log(static_cast(n)); }); constexpr char n_lg_n_test_name[] = "BM_Complexity_O_N_log_N/manual_time"; constexpr char big_o_n_lg_n_test_name[] = "BM_Complexity_O_N_log_N/manual_time_BigO"; constexpr char rms_o_n_lg_n_test_name[] = "BM_Complexity_O_N_log_N/manual_time_RMS"; constexpr char enum_auto_big_o_n_lg_n[] = "NlgN"; constexpr char lambda_big_o_n_lg_n[] = "f\\(N\\)"; // Add enum tests ADD_COMPLEXITY_CASES(n_lg_n_test_name, big_o_n_lg_n_test_name, rms_o_n_lg_n_test_name, enum_auto_big_o_n_lg_n, /*family_index=*/6); // NOTE: auto big-o is wron.g ADD_COMPLEXITY_CASES(n_lg_n_test_name, big_o_n_lg_n_test_name, rms_o_n_lg_n_test_name, enum_auto_big_o_n_lg_n, /*family_index=*/7); //// Add lambda tests ADD_COMPLEXITY_CASES(n_lg_n_test_name, big_o_n_lg_n_test_name, rms_o_n_lg_n_test_name, lambda_big_o_n_lg_n, /*family_index=*/8); // ========================================================================= // // -------- Testing formatting of Complexity with captured args ------------ // // ========================================================================= // void BM_ComplexityCaptureArgs(benchmark::State& state, int n) { for (auto _ : state) { // This test requires a non-zero CPU time to avoid divide-by-zero benchmark::DoNotOptimize(state.iterations()); double tmp = static_cast(state.iterations()); benchmark::DoNotOptimize(tmp); for (benchmark::IterationCount i = 0; i < state.iterations(); ++i) { benchmark::DoNotOptimize(state.iterations()); tmp *= static_cast(state.iterations()); benchmark::DoNotOptimize(tmp); } state.SetIterationTime(static_cast(state.range(0)) * 42 * 1e-9); } state.SetComplexityN(n); } BENCHMARK_CAPTURE(BM_ComplexityCaptureArgs, capture_test, 100) ->UseManualTime() ->Complexity(benchmark::oN) ->Ranges({{1, 2}, {3, 4}}); const std::string complexity_capture_name = "BM_ComplexityCaptureArgs/capture_test/manual_time"; ADD_COMPLEXITY_CASES(complexity_capture_name, complexity_capture_name + "_BigO", complexity_capture_name + "_RMS", "N", /*family_index=*/9); } // end namespace // ========================================================================= // // --------------------------- TEST CASES END ------------------------------ // // ========================================================================= // int main(int argc, char* argv[]) { benchmark::MaybeReenterWithoutASLR(argc, argv); RunOutputTests(argc, argv); } ================================================ FILE: test/cxx11_test.cc ================================================ #include "benchmark/benchmark_api.h" #if defined(_MSC_VER) #if _MSVC_LANG != 201402L // MSVC, even in C++11 mode, dooes not claim to be in C++11 mode. #error "Trying to compile C++11 test with wrong C++ standard" #endif // _MSVC_LANG #else // Non-MSVC #if __cplusplus != 201103L #error "Trying to compile C++11 test with wrong C++ standard" #endif // Non-MSVC #endif ================================================ FILE: test/diagnostics_test.cc ================================================ // Testing: // State::PauseTiming() // State::ResumeTiming() // Test that CHECK's within these function diagnose when they are called // outside of the KeepRunning() loop. // // NOTE: Users should NOT include or use src/check.h. This is only done in // order to test library internals. #include #include #include "../src/check.h" #include "benchmark/benchmark_api.h" #include "benchmark/registration.h" #include "benchmark/state.h" #include "benchmark/utils.h" #if defined(__GNUC__) && !defined(__EXCEPTIONS) #define TEST_HAS_NO_EXCEPTIONS #endif namespace { void TestHandler() { #ifndef TEST_HAS_NO_EXCEPTIONS throw std::logic_error(""); #else std::abort(); #endif } void try_invalid_pause_resume(benchmark::State& state) { #if !defined(TEST_BENCHMARK_LIBRARY_HAS_NO_ASSERTIONS) && \ !defined(TEST_HAS_NO_EXCEPTIONS) try { state.PauseTiming(); std::abort(); } catch (std::logic_error const&) { } try { state.ResumeTiming(); std::abort(); } catch (std::logic_error const&) { } #else (void)state; // avoid unused warning #endif } void BM_diagnostic_test(benchmark::State& state) { static bool called_once = false; if (!called_once) { try_invalid_pause_resume(state); } for (auto _ : state) { auto iterations = static_cast(state.iterations()) * static_cast(state.iterations()); benchmark::DoNotOptimize(iterations); } if (!called_once) { try_invalid_pause_resume(state); } called_once = true; } BENCHMARK(BM_diagnostic_test); void BM_diagnostic_test_keep_running(benchmark::State& state) { static bool called_once = false; if (!called_once) { try_invalid_pause_resume(state); } while (state.KeepRunning()) { auto iterations = static_cast(state.iterations()) * static_cast(state.iterations()); benchmark::DoNotOptimize(iterations); } if (!called_once) { try_invalid_pause_resume(state); } called_once = true; } BENCHMARK(BM_diagnostic_test_keep_running); } // end namespace int main(int argc, char* argv[]) { #ifdef NDEBUG // This test is exercising functionality for debug builds, which are not // available in release builds. Skip the test if we are in that environment // to avoid a test failure. std::cout << "Diagnostic test disabled in release build\n"; (void)argc; (void)argv; #else benchmark::MaybeReenterWithoutASLR(argc, argv); benchmark::internal::GetAbortHandler() = &TestHandler; benchmark::Initialize(&argc, argv); benchmark::RunSpecifiedBenchmarks(); #endif } ================================================ FILE: test/display_aggregates_only_test.cc ================================================ #undef NDEBUG #include #include #include "benchmark/benchmark_api.h" #include "benchmark/registration.h" #include "benchmark/state.h" #include "output_test.h" // Ok this test is super ugly. We want to check what happens with the file // reporter in the presence of DisplayAggregatesOnly(). // We do not care about console output, the normal tests check that already. namespace { void BM_SummaryRepeat(benchmark::State& state) { for (auto _ : state) { } } BENCHMARK(BM_SummaryRepeat)->Repetitions(3)->DisplayAggregatesOnly(); } // end namespace int main(int argc, char* argv[]) { benchmark::MaybeReenterWithoutASLR(argc, argv); const std::string output = GetFileReporterOutput(argc, argv); if (SubstrCnt(output, "\"name\": \"BM_SummaryRepeat/repeats:3") != 7 || SubstrCnt(output, "\"name\": \"BM_SummaryRepeat/repeats:3\"") != 3 || SubstrCnt(output, "\"name\": \"BM_SummaryRepeat/repeats:3_mean\"") != 1 || SubstrCnt(output, "\"name\": \"BM_SummaryRepeat/repeats:3_median\"") != 1 || SubstrCnt(output, "\"name\": \"BM_SummaryRepeat/repeats:3_stddev\"") != 1 || SubstrCnt(output, "\"name\": \"BM_SummaryRepeat/repeats:3_cv\"") != 1) { std::cout << "Precondition mismatch. Expected to only find 8 " "occurrences of \"BM_SummaryRepeat/repeats:3\" substring:\n" "\"name\": \"BM_SummaryRepeat/repeats:3\", " "\"name\": \"BM_SummaryRepeat/repeats:3\", " "\"name\": \"BM_SummaryRepeat/repeats:3\", " "\"name\": \"BM_SummaryRepeat/repeats:3_mean\", " "\"name\": \"BM_SummaryRepeat/repeats:3_median\", " "\"name\": \"BM_SummaryRepeat/repeats:3_stddev\", " "\"name\": \"BM_SummaryRepeat/repeats:3_cv\"\nThe entire " "output:\n"; std::cout << output; return 1; } return 0; } ================================================ FILE: test/donotoptimize_assembly_test.cc ================================================ #include "benchmark/macros.h" #include "benchmark/utils.h" #ifdef __clang__ #pragma clang diagnostic ignored "-Wreturn-type" #pragma clang diagnostic ignored "-Wmissing-prototypes" #endif BENCHMARK_DISABLE_DEPRECATED_WARNING extern "C" { extern int ExternInt; extern int ExternInt2; extern int ExternInt3; extern int BigArray[2049]; const int ConstBigArray[2049]{}; inline int Add42(int x) { return x + 42; } struct NotTriviallyCopyable { NotTriviallyCopyable(); explicit NotTriviallyCopyable(int x) : value(x) {} NotTriviallyCopyable(NotTriviallyCopyable const&); int value; }; struct Large { int value; int data[2]; }; struct ExtraLarge { int arr[2049]; }; } extern ExtraLarge ExtraLargeObj; const ExtraLarge ConstExtraLargeObj{}; // CHECK-LABEL: test_with_rvalue: extern "C" void test_with_rvalue() { benchmark::DoNotOptimize(Add42(0)); // CHECK: movl $42, %eax // CHECK: ret } // CHECK-LABEL: test_with_large_rvalue: extern "C" void test_with_large_rvalue() { benchmark::DoNotOptimize(Large{ExternInt, {ExternInt, ExternInt}}); // CHECK: ExternInt(%rip) // CHECK: movl %eax, -{{[0-9]+}}(%[[REG:[a-z]+]] // CHECK: movl %eax, -{{[0-9]+}}(%[[REG]]) // CHECK: movl %eax, -{{[0-9]+}}(%[[REG]]) // CHECK: ret } // CHECK-LABEL: test_with_non_trivial_rvalue: extern "C" void test_with_non_trivial_rvalue() { benchmark::DoNotOptimize(NotTriviallyCopyable(ExternInt)); // CHECK: mov{{l|q}} ExternInt(%rip) // CHECK: ret } // CHECK-LABEL: test_with_lvalue: extern "C" void test_with_lvalue() { int x = 101; benchmark::DoNotOptimize(x); // CHECK-GNU: movl $101, %eax // CHECK-CLANG: movl $101, -{{[0-9]+}}(%[[REG:[a-z]+]]) // CHECK: ret } // CHECK-LABEL: test_with_large_lvalue: extern "C" void test_with_large_lvalue() { Large L{ExternInt, {ExternInt, ExternInt}}; benchmark::DoNotOptimize(L); // CHECK: ExternInt(%rip) // CHECK: movl %eax, -{{[0-9]+}}(%[[REG:[a-z]+]]) // CHECK: movl %eax, -{{[0-9]+}}(%[[REG]]) // CHECK: movl %eax, -{{[0-9]+}}(%[[REG]]) // CHECK: ret } // CHECK-LABEL: test_with_extra_large_lvalue_with_op: extern "C" void test_with_extra_large_lvalue_with_op() { ExtraLargeObj.arr[16] = 42; benchmark::DoNotOptimize(ExtraLargeObj); // CHECK: movl $42, ExtraLargeObj+64(%rip) // CHECK: ret } // CHECK-LABEL: test_with_big_array_with_op extern "C" void test_with_big_array_with_op() { BigArray[16] = 42; benchmark::DoNotOptimize(BigArray); // CHECK: movl $42, BigArray+64(%rip) // CHECK: ret } // CHECK-LABEL: test_with_non_trivial_lvalue: extern "C" void test_with_non_trivial_lvalue() { NotTriviallyCopyable NTC(ExternInt); benchmark::DoNotOptimize(NTC); // CHECK: ExternInt(%rip) // CHECK: movl %eax, -{{[0-9]+}}(%[[REG:[a-z]+]]) // CHECK: ret } // CHECK-LABEL: test_with_const_lvalue: extern "C" void test_with_const_lvalue() { const int x = 123; benchmark::DoNotOptimize(x); // CHECK: movl $123, %eax // CHECK: ret } // CHECK-LABEL: test_with_large_const_lvalue: extern "C" void test_with_large_const_lvalue() { const Large L{ExternInt, {ExternInt, ExternInt}}; benchmark::DoNotOptimize(L); // CHECK: ExternInt(%rip) // CHECK: movl %eax, -{{[0-9]+}}(%[[REG:[a-z]+]]) // CHECK: movl %eax, -{{[0-9]+}}(%[[REG]]) // CHECK: movl %eax, -{{[0-9]+}}(%[[REG]]) // CHECK: ret } // CHECK-LABEL: test_with_const_extra_large_obj: extern "C" void test_with_const_extra_large_obj() { benchmark::DoNotOptimize(ConstExtraLargeObj); // CHECK: ret } // CHECK-LABEL: test_with_const_big_array extern "C" void test_with_const_big_array() { benchmark::DoNotOptimize(ConstBigArray); // CHECK: ret } // CHECK-LABEL: test_with_non_trivial_const_lvalue: extern "C" void test_with_non_trivial_const_lvalue() { const NotTriviallyCopyable Obj(ExternInt); benchmark::DoNotOptimize(Obj); // CHECK: mov{{q|l}} ExternInt(%rip) // CHECK: ret } // CHECK-LABEL: test_div_by_two: extern "C" int test_div_by_two(int input) { int divisor = 2; benchmark::DoNotOptimize(divisor); return input / divisor; // CHECK: movl $2, [[DEST:.*]] // CHECK: idivl [[DEST]] // CHECK: ret } // CHECK-LABEL: test_inc_integer: extern "C" int test_inc_integer() { int x = 0; for (int i = 0; i < 5; ++i) benchmark::DoNotOptimize(++x); // CHECK: movl $1, [[DEST:.*]] // CHECK: {{(addl \$1,|incl)}} [[DEST]] // CHECK: {{(addl \$1,|incl)}} [[DEST]] // CHECK: {{(addl \$1,|incl)}} [[DEST]] // CHECK: {{(addl \$1,|incl)}} [[DEST]] // CHECK-CLANG: movl [[DEST]], %eax // CHECK: ret return x; } // CHECK-LABEL: test_pointer_rvalue extern "C" void test_pointer_rvalue() { // CHECK: movl $42, [[DEST:.*]] // CHECK: leaq [[DEST]], %rax // CHECK-CLANG: movq %rax, -{{[0-9]+}}(%[[REG:[a-z]+]]) // CHECK: ret int x = 42; benchmark::DoNotOptimize(&x); } // CHECK-LABEL: test_pointer_const_lvalue: extern "C" void test_pointer_const_lvalue() { // CHECK: movl $42, [[DEST:.*]] // CHECK: leaq [[DEST]], %rax // CHECK-CLANG: movq %rax, -{{[0-9]+}}(%[[REG:[a-z]+]]) // CHECK: ret int x = 42; int* const xp = &x; benchmark::DoNotOptimize(xp); } // CHECK-LABEL: test_pointer_lvalue: extern "C" void test_pointer_lvalue() { // CHECK: movl $42, [[DEST:.*]] // CHECK: leaq [[DEST]], %rax // CHECK-CLANG: movq %rax, -{{[0-9]+}}(%[[REG:[a-z+]+]]) // CHECK: ret int x = 42; int* xp = &x; benchmark::DoNotOptimize(xp); } ================================================ FILE: test/donotoptimize_test.cc ================================================ #include #include "benchmark/benchmark_api.h" #include "benchmark/utils.h" namespace { #if defined(__GNUC__) std::int64_t double_up(std::int64_t x) __attribute__((const)); #endif std::int64_t double_up(const std::int64_t x) { return x * 2; } } // namespace // Using DoNotOptimize on types like BitRef seem to cause a lot of problems // with the inline assembly on both GCC and Clang. struct BitRef { int index; unsigned char& byte; public: static BitRef Make() { static unsigned char arr[2] = {}; BitRef b(1, arr[0]); return b; } private: BitRef(int i, unsigned char& b) : index(i), byte(b) {} }; int main(int argc, char* argv[]) { benchmark::MaybeReenterWithoutASLR(argc, argv); // this test verifies compilation of DoNotOptimize() for some types char buffer1[1] = ""; benchmark::DoNotOptimize(buffer1); char buffer2[2] = ""; benchmark::DoNotOptimize(buffer2); char buffer3[3] = ""; benchmark::DoNotOptimize(buffer3); char buffer8[8] = ""; benchmark::DoNotOptimize(buffer8); char buffer20[20] = ""; benchmark::DoNotOptimize(buffer20); char buffer1024[1024] = ""; benchmark::DoNotOptimize(buffer1024); char* bptr = &buffer1024[0]; benchmark::DoNotOptimize(bptr); int x = 123; benchmark::DoNotOptimize(x); int* xp = &x; benchmark::DoNotOptimize(xp); benchmark::DoNotOptimize(x += 42); std::int64_t y = double_up(x); benchmark::DoNotOptimize(y); // These tests are to e BitRef lval = BitRef::Make(); benchmark::DoNotOptimize(lval); // Check that accept rvalue. benchmark::DoNotOptimize(BitRef::Make()); } ================================================ FILE: test/filter_test.cc ================================================ #include #include #include #include #include #include #include #include #include #include "benchmark/benchmark_api.h" #include "benchmark/registration.h" #include "benchmark/reporter.h" #include "benchmark/state.h" namespace { class TestReporter : public benchmark::ConsoleReporter { public: bool ReportContext(const Context& context) override { return ConsoleReporter::ReportContext(context); }; void ReportRuns(const std::vector& report) override { ++count_; max_family_index_ = std::max(max_family_index_, report[0].family_index); ConsoleReporter::ReportRuns(report); }; TestReporter() : count_(0), max_family_index_(0) {} ~TestReporter() override {} int GetCount() const { return count_; } int64_t GetMaxFamilyIndex() const { return max_family_index_; } private: mutable int count_; mutable int64_t max_family_index_; }; void NoPrefix(benchmark::State& state) { for (auto _ : state) { } } BENCHMARK(NoPrefix); void BM_Foo(benchmark::State& state) { for (auto _ : state) { } } BENCHMARK(BM_Foo); void BM_Bar(benchmark::State& state) { for (auto _ : state) { } } BENCHMARK(BM_Bar); void BM_FooBar(benchmark::State& state) { for (auto _ : state) { } } BENCHMARK(BM_FooBar); void BM_FooBa(benchmark::State& state) { for (auto _ : state) { } } BENCHMARK(BM_FooBa); } // end namespace int main(int argc, char** argv) { benchmark::MaybeReenterWithoutASLR(argc, argv); bool list_only = false; for (int i = 0; i < argc; ++i) { list_only |= std::string(argv[i]).find("--benchmark_list_tests") != std::string::npos; } benchmark::Initialize(&argc, argv); TestReporter test_reporter; const int64_t returned_count = static_cast(benchmark::RunSpecifiedBenchmarks(&test_reporter)); if (argc == 2) { // Make sure we ran all of the tests std::stringstream ss(argv[1]); int64_t expected_return = 0; ss >> expected_return; if (returned_count != expected_return) { std::cerr << "ERROR: Expected " << expected_return << " tests to match the filter but returned_count = " << returned_count << '\n'; return -1; } const int64_t expected_reports = list_only ? 0 : expected_return; const int64_t reports_count = test_reporter.GetCount(); if (reports_count != expected_reports) { std::cerr << "ERROR: Expected " << expected_reports << " tests to be run but reported_count = " << reports_count << '\n'; return -1; } const int64_t max_family_index = test_reporter.GetMaxFamilyIndex(); const int64_t num_families = reports_count == 0 ? 0 : 1 + max_family_index; if (num_families != expected_reports) { std::cerr << "ERROR: Expected " << expected_reports << " test families to be run but num_families = " << num_families << '\n'; return -1; } } return 0; } ================================================ FILE: test/fixture_test.cc ================================================ #include #include #include "benchmark/benchmark_api.h" #include "benchmark/registration.h" #include "benchmark/state.h" #define FIXTURE_BECHMARK_NAME MyFixture class FIXTURE_BECHMARK_NAME : public ::benchmark::Fixture { public: void SetUp(const ::benchmark::State& state) override { if (state.thread_index() == 0) { assert(data.get() == nullptr); data.reset(new int(42)); } } void TearDown(const ::benchmark::State& state) override { if (state.thread_index() == 0) { assert(data.get() != nullptr); data.reset(); } } ~FIXTURE_BECHMARK_NAME() override { assert(data == nullptr); } std::unique_ptr data; }; BENCHMARK_F(FIXTURE_BECHMARK_NAME, Foo)(benchmark::State& st) { assert(data.get() != nullptr); assert(*data == 42); for (auto _ : st) { } } BENCHMARK_DEFINE_F(FIXTURE_BECHMARK_NAME, Bar)(benchmark::State& st) { if (st.thread_index() == 0) { assert(data.get() != nullptr); assert(*data == 42); } for (auto _ : st) { assert(data.get() != nullptr); assert(*data == 42); } st.SetItemsProcessed(st.range(0)); } BENCHMARK_REGISTER_F(FIXTURE_BECHMARK_NAME, Bar)->Arg(42); BENCHMARK_REGISTER_F(FIXTURE_BECHMARK_NAME, Bar)->Arg(42)->ThreadPerCpu(); BENCHMARK_MAIN(); ================================================ FILE: test/internal_threading_test.cc ================================================ #undef NDEBUG #include #include #include "../src/timers.h" #include "benchmark/benchmark_api.h" #include "benchmark/counter.h" #include "benchmark/registration.h" #include "benchmark/state.h" #include "output_test.h" namespace { const std::chrono::duration time_frame(50); const double time_frame_in_sec( std::chrono::duration_cast>>( time_frame) .count()); void MyBusySpinwait() { const auto start = benchmark::ChronoClockNow(); while (true) { const auto now = benchmark::ChronoClockNow(); const auto elapsed = now - start; if (std::chrono::duration(elapsed) >= time_frame) { return; } } } // ========================================================================= // // --------------------------- TEST CASES BEGIN ---------------------------- // // ========================================================================= // // ========================================================================= // // BM_MainThread void BM_MainThread(benchmark::State& state) { for (auto _ : state) { MyBusySpinwait(); state.SetIterationTime(time_frame_in_sec); } state.counters["invtime"] = benchmark::Counter{1, benchmark::Counter::kIsRate}; } BENCHMARK(BM_MainThread)->Iterations(1)->Threads(1); BENCHMARK(BM_MainThread)->Iterations(1)->Threads(1)->UseRealTime(); BENCHMARK(BM_MainThread)->Iterations(1)->Threads(1)->UseManualTime(); BENCHMARK(BM_MainThread)->Iterations(1)->Threads(1)->MeasureProcessCPUTime(); BENCHMARK(BM_MainThread) ->Iterations(1) ->Threads(1) ->MeasureProcessCPUTime() ->UseRealTime(); BENCHMARK(BM_MainThread) ->Iterations(1) ->Threads(1) ->MeasureProcessCPUTime() ->UseManualTime(); BENCHMARK(BM_MainThread)->Iterations(1)->Threads(2); BENCHMARK(BM_MainThread)->Iterations(1)->Threads(2)->UseRealTime(); BENCHMARK(BM_MainThread)->Iterations(1)->Threads(2)->UseManualTime(); BENCHMARK(BM_MainThread)->Iterations(1)->Threads(2)->MeasureProcessCPUTime(); BENCHMARK(BM_MainThread) ->Iterations(1) ->Threads(2) ->MeasureProcessCPUTime() ->UseRealTime(); BENCHMARK(BM_MainThread) ->Iterations(1) ->Threads(2) ->MeasureProcessCPUTime() ->UseManualTime(); // ========================================================================= // // BM_WorkerThread void BM_WorkerThread(benchmark::State& state) { for (auto _ : state) { std::thread Worker(&MyBusySpinwait); Worker.join(); state.SetIterationTime(time_frame_in_sec); } state.counters["invtime"] = benchmark::Counter{1, benchmark::Counter::kIsRate}; } BENCHMARK(BM_WorkerThread)->Iterations(1)->Threads(1); BENCHMARK(BM_WorkerThread)->Iterations(1)->Threads(1)->UseRealTime(); BENCHMARK(BM_WorkerThread)->Iterations(1)->Threads(1)->UseManualTime(); BENCHMARK(BM_WorkerThread)->Iterations(1)->Threads(1)->MeasureProcessCPUTime(); BENCHMARK(BM_WorkerThread) ->Iterations(1) ->Threads(1) ->MeasureProcessCPUTime() ->UseRealTime(); BENCHMARK(BM_WorkerThread) ->Iterations(1) ->Threads(1) ->MeasureProcessCPUTime() ->UseManualTime(); BENCHMARK(BM_WorkerThread)->Iterations(1)->Threads(2); BENCHMARK(BM_WorkerThread)->Iterations(1)->Threads(2)->UseRealTime(); BENCHMARK(BM_WorkerThread)->Iterations(1)->Threads(2)->UseManualTime(); BENCHMARK(BM_WorkerThread)->Iterations(1)->Threads(2)->MeasureProcessCPUTime(); BENCHMARK(BM_WorkerThread) ->Iterations(1) ->Threads(2) ->MeasureProcessCPUTime() ->UseRealTime(); BENCHMARK(BM_WorkerThread) ->Iterations(1) ->Threads(2) ->MeasureProcessCPUTime() ->UseManualTime(); // ========================================================================= // // BM_MainThreadAndWorkerThread void BM_MainThreadAndWorkerThread(benchmark::State& state) { for (auto _ : state) { std::thread Worker(&MyBusySpinwait); MyBusySpinwait(); Worker.join(); state.SetIterationTime(time_frame_in_sec); } state.counters["invtime"] = benchmark::Counter{1, benchmark::Counter::kIsRate}; } BENCHMARK(BM_MainThreadAndWorkerThread)->Iterations(1)->Threads(1); BENCHMARK(BM_MainThreadAndWorkerThread) ->Iterations(1) ->Threads(1) ->UseRealTime(); BENCHMARK(BM_MainThreadAndWorkerThread) ->Iterations(1) ->Threads(1) ->UseManualTime(); BENCHMARK(BM_MainThreadAndWorkerThread) ->Iterations(1) ->Threads(1) ->MeasureProcessCPUTime(); BENCHMARK(BM_MainThreadAndWorkerThread) ->Iterations(1) ->Threads(1) ->MeasureProcessCPUTime() ->UseRealTime(); BENCHMARK(BM_MainThreadAndWorkerThread) ->Iterations(1) ->Threads(1) ->MeasureProcessCPUTime() ->UseManualTime(); BENCHMARK(BM_MainThreadAndWorkerThread)->Iterations(1)->Threads(2); BENCHMARK(BM_MainThreadAndWorkerThread) ->Iterations(1) ->Threads(2) ->UseRealTime(); BENCHMARK(BM_MainThreadAndWorkerThread) ->Iterations(1) ->Threads(2) ->UseManualTime(); BENCHMARK(BM_MainThreadAndWorkerThread) ->Iterations(1) ->Threads(2) ->MeasureProcessCPUTime(); BENCHMARK(BM_MainThreadAndWorkerThread) ->Iterations(1) ->Threads(2) ->MeasureProcessCPUTime() ->UseRealTime(); BENCHMARK(BM_MainThreadAndWorkerThread) ->Iterations(1) ->Threads(2) ->MeasureProcessCPUTime() ->UseManualTime(); } // end namespace // ========================================================================= // // ---------------------------- TEST CASES END ----------------------------- // // ========================================================================= // int main(int argc, char* argv[]) { benchmark::MaybeReenterWithoutASLR(argc, argv); RunOutputTests(argc, argv); } ================================================ FILE: test/link_main_test.cc ================================================ #include "benchmark/registration.h" #include "benchmark/state.h" #include "benchmark/utils.h" namespace { void BM_empty(benchmark::State& state) { for (auto _ : state) { auto iterations = static_cast(state.iterations()) * static_cast(state.iterations()); benchmark::DoNotOptimize(iterations); } } BENCHMARK(BM_empty); } // end namespace ================================================ FILE: test/locale_impermeability_test.cc ================================================ #undef NDEBUG #include #include #include #include "benchmark/benchmark_api.h" #include "benchmark/registration.h" #include "benchmark/state.h" #include "output_test.h" namespace { void BM_ostream(benchmark::State& state) { #if !defined(__MINGW64__) || defined(__clang__) // GCC-based versions of MINGW64 do not support locale manipulations, // don't run the test under them. std::locale::global(std::locale("en_US.UTF-8")); #endif while (state.KeepRunning()) { state.SetIterationTime(1e-6); } } BENCHMARK(BM_ostream)->UseManualTime()->Iterations(1000000); ADD_CASES(TC_ConsoleOut, {{"^BM_ostream/iterations:1000000/manual_time" " %console_report$"}}); ADD_CASES(TC_JSONOut, {{"\"name\": \"BM_ostream/iterations:1000000/manual_time\",$"}, {"\"family_index\": 0,$", MR_Next}, {"\"per_family_instance_index\": 0,$", MR_Next}, {"\"run_name\": " "\"BM_ostream/iterations:1000000/manual_time\",$", MR_Next}, {"\"run_type\": \"iteration\",$", MR_Next}, {"\"repetitions\": 1,$", MR_Next}, {"\"repetition_index\": 0,$", MR_Next}, {"\"threads\": 1,$", MR_Next}, {"\"iterations\": 1000000,$", MR_Next}, {"\"real_time\": %float,$", MR_Next}, {"\"cpu_time\": %float,$", MR_Next}, {"\"time_unit\": \"ns\"$", MR_Next}, {"}", MR_Next}}); ADD_CASES(TC_CSVOut, {{"^\"BM_ostream/iterations:1000000/" "manual_time\",1000000,%float,%float,ns,,,,,$"}}); } // end namespace int main(int argc, char* argv[]) { benchmark::MaybeReenterWithoutASLR(argc, argv); RunOutputTests(argc, argv); } ================================================ FILE: test/manual_threading_test.cc ================================================ #include #undef NDEBUG #include #include #include "../src/timers.h" #include "benchmark/benchmark_api.h" #include "benchmark/counter.h" #include "benchmark/registration.h" #include "benchmark/state.h" namespace { const std::chrono::duration time_frame(50); const double time_frame_in_sec( std::chrono::duration_cast>>( time_frame) .count()); void MyBusySpinwait() { const auto start = benchmark::ChronoClockNow(); while (true) { const auto now = benchmark::ChronoClockNow(); const auto elapsed = now - start; if (std::chrono::duration(elapsed) >= time_frame) { return; } } } int numRunThreadsCalled_ = 0; class ManualThreadRunner : public benchmark::ThreadRunnerBase { public: explicit ManualThreadRunner(int num_threads) : pool(static_cast(num_threads - 1)) {} void RunThreads(const std::function& fn) final { for (std::size_t ti = 0; ti < pool.size(); ++ti) { pool[ti] = std::thread(fn, static_cast(ti + 1)); } fn(0); for (std::thread& thread : pool) { thread.join(); } ++numRunThreadsCalled_; } private: std::vector pool; }; // ========================================================================= // // --------------------------- TEST CASES BEGIN ---------------------------- // // ========================================================================= // // ========================================================================= // // BM_ManualThreading // Creation of threads is done before the start of the measurement, // joining after the finish of the measurement. void BM_ManualThreading(benchmark::State& state) { for (auto _ : state) { MyBusySpinwait(); state.SetIterationTime(time_frame_in_sec); } state.counters["invtime"] = benchmark::Counter{1, benchmark::Counter::kIsRate}; } } // end namespace BENCHMARK(BM_ManualThreading) ->Iterations(1) ->ThreadRunner([](int num_threads) { return std::make_unique(num_threads); }) ->Threads(1); BENCHMARK(BM_ManualThreading) ->Iterations(1) ->ThreadRunner([](int num_threads) { return std::make_unique(num_threads); }) ->Threads(1) ->UseRealTime(); BENCHMARK(BM_ManualThreading) ->Iterations(1) ->ThreadRunner([](int num_threads) { return std::make_unique(num_threads); }) ->Threads(1) ->UseManualTime(); BENCHMARK(BM_ManualThreading) ->Iterations(1) ->ThreadRunner([](int num_threads) { return std::make_unique(num_threads); }) ->Threads(1) ->MeasureProcessCPUTime(); BENCHMARK(BM_ManualThreading) ->Iterations(1) ->ThreadRunner([](int num_threads) { return std::make_unique(num_threads); }) ->Threads(1) ->MeasureProcessCPUTime() ->UseRealTime(); BENCHMARK(BM_ManualThreading) ->Iterations(1) ->ThreadRunner([](int num_threads) { return std::make_unique(num_threads); }) ->Threads(1) ->MeasureProcessCPUTime() ->UseManualTime(); BENCHMARK(BM_ManualThreading) ->Iterations(1) ->ThreadRunner([](int num_threads) { return std::make_unique(num_threads); }) ->Threads(2); BENCHMARK(BM_ManualThreading) ->Iterations(1) ->ThreadRunner([](int num_threads) { return std::make_unique(num_threads); }) ->Threads(2) ->UseRealTime(); BENCHMARK(BM_ManualThreading) ->Iterations(1) ->ThreadRunner([](int num_threads) { return std::make_unique(num_threads); }) ->Threads(2) ->UseManualTime(); BENCHMARK(BM_ManualThreading) ->Iterations(1) ->ThreadRunner([](int num_threads) { return std::make_unique(num_threads); }) ->Threads(2) ->MeasureProcessCPUTime(); BENCHMARK(BM_ManualThreading) ->Iterations(1) ->ThreadRunner([](int num_threads) { return std::make_unique(num_threads); }) ->Threads(2) ->MeasureProcessCPUTime() ->UseRealTime(); BENCHMARK(BM_ManualThreading) ->Iterations(1) ->ThreadRunner([](int num_threads) { return std::make_unique(num_threads); }) ->Threads(2) ->MeasureProcessCPUTime() ->UseManualTime(); // ========================================================================= // // ---------------------------- TEST CASES END ----------------------------- // // ========================================================================= // int main(int argc, char* argv[]) { benchmark::MaybeReenterWithoutASLR(argc, argv); benchmark::Initialize(&argc, argv); benchmark::RunSpecifiedBenchmarks(); benchmark::Shutdown(); assert(numRunThreadsCalled_ > 0); } ================================================ FILE: test/map_test.cc ================================================ #include #include #include "benchmark/benchmark_api.h" #include "benchmark/registration.h" #include "benchmark/state.h" #include "benchmark/utils.h" namespace { std::map ConstructRandomMap(int size) { std::map m; for (int i = 0; i < size; ++i) { m.insert(std::make_pair(std::rand() % size, std::rand() % size)); } return m; } // Basic version. void BM_MapLookup(benchmark::State& state) { const int size = static_cast(state.range(0)); std::map m; for (auto _ : state) { state.PauseTiming(); m = ConstructRandomMap(size); state.ResumeTiming(); for (int i = 0; i < size; ++i) { auto it = m.find(std::rand() % size); benchmark::DoNotOptimize(it); } } state.SetItemsProcessed(state.iterations() * size); } BENCHMARK(BM_MapLookup)->Range(1 << 3, 1 << 12); } // namespace // Using fixtures. class MapFixture : public ::benchmark::Fixture { public: void SetUp(const ::benchmark::State& st) override { m = ConstructRandomMap(static_cast(st.range(0))); } void TearDown(const ::benchmark::State& /*unused*/) override { m.clear(); } std::map m; }; BENCHMARK_DEFINE_F(MapFixture, Lookup)(benchmark::State& state) { const int size = static_cast(state.range(0)); for (auto _ : state) { for (int i = 0; i < size; ++i) { auto it = m.find(std::rand() % size); benchmark::DoNotOptimize(it); } } state.SetItemsProcessed(state.iterations() * size); } BENCHMARK_REGISTER_F(MapFixture, Lookup)->Range(1 << 3, 1 << 12); BENCHMARK_MAIN(); ================================================ FILE: test/memory_manager_test.cc ================================================ #include #include "benchmark/benchmark_api.h" #include "benchmark/managers.h" #include "benchmark/registration.h" #include "benchmark/state.h" #include "benchmark/utils.h" #include "output_test.h" namespace { class TestMemoryManager : public benchmark::MemoryManager { void Start() override {} void Stop(Result& result) override { result.num_allocs = 42; result.max_bytes_used = 42000; } }; void BM_empty(benchmark::State& state) { for (auto _ : state) { auto iterations = static_cast(state.iterations()) * static_cast(state.iterations()); benchmark::DoNotOptimize(iterations); } } BENCHMARK(BM_empty); } // end namespace ADD_CASES(TC_ConsoleOut, {{"^BM_empty %console_report$"}}); ADD_CASES(TC_JSONOut, {{"\"name\": \"BM_empty\",$"}, {"\"family_index\": 0,$", MR_Next}, {"\"per_family_instance_index\": 0,$", MR_Next}, {"\"run_name\": \"BM_empty\",$", MR_Next}, {"\"run_type\": \"iteration\",$", MR_Next}, {"\"repetitions\": 1,$", MR_Next}, {"\"repetition_index\": 0,$", MR_Next}, {"\"threads\": 1,$", MR_Next}, {"\"iterations\": %int,$", MR_Next}, {"\"real_time\": %float,$", MR_Next}, {"\"cpu_time\": %float,$", MR_Next}, {"\"time_unit\": \"ns\",$", MR_Next}, {"\"allocs_per_iter\": %float,$", MR_Next}, {"\"max_bytes_used\": 42000$", MR_Next}, {"}", MR_Next}}); ADD_CASES(TC_CSVOut, {{"^\"BM_empty\",%csv_report$"}}); int main(int argc, char* argv[]) { benchmark::MaybeReenterWithoutASLR(argc, argv); std::unique_ptr mm(new TestMemoryManager()); benchmark::RegisterMemoryManager(mm.get()); RunOutputTests(argc, argv); benchmark::RegisterMemoryManager(nullptr); } ================================================ FILE: test/memory_results_gtest.cc ================================================ #include #include "benchmark/benchmark_api.h" #include "benchmark/managers.h" #include "benchmark/reporter.h" #include "benchmark/state.h" #include "gtest/gtest.h" namespace { using benchmark::Benchmark; using benchmark::ClearRegisteredBenchmarks; using benchmark::ConsoleReporter; using benchmark::MemoryManager; using benchmark::RegisterBenchmark; using benchmark::RunSpecifiedBenchmarks; using benchmark::State; constexpr int N_REPETITIONS = 100; constexpr int N_ITERATIONS = 1; int num_allocs = 0; int max_bytes_used = 0; int total_allocated_bytes = 0; int net_heap_growth = 0; void reset() { num_allocs = 0; max_bytes_used = 0; total_allocated_bytes = 0; net_heap_growth = 0; } class TestMemoryManager : public MemoryManager { void Start() override {} void Stop(Result& result) override { result.num_allocs = num_allocs; result.net_heap_growth = net_heap_growth; result.max_bytes_used = max_bytes_used; result.total_allocated_bytes = total_allocated_bytes; num_allocs += 1; max_bytes_used += 2; net_heap_growth += 4; total_allocated_bytes += 10; } }; class TestReporter : public ConsoleReporter { public: TestReporter() = default; virtual ~TestReporter() = default; bool ReportContext(const Context& /*unused*/) override { return true; } void PrintHeader(const Run&) override {} void PrintRunData(const Run& run) override { if (run.repetition_index == -1) return; if (!run.memory_result.memory_iterations) return; store.push_back(run.memory_result); } std::vector store; }; class MemoryResultsTest : public testing::Test { public: Benchmark* bm; TestReporter reporter; void SetUp() override { bm = RegisterBenchmark("BM", [](State& st) { for (auto _ : st) { } }); bm->Repetitions(N_REPETITIONS); bm->Iterations(N_ITERATIONS); reset(); } void TearDown() override { ClearRegisteredBenchmarks(); } }; TEST_F(MemoryResultsTest, NoMMTest) { RunSpecifiedBenchmarks(&reporter); EXPECT_EQ(reporter.store.size(), 0); } TEST_F(MemoryResultsTest, ResultsTest) { auto mm = std::make_unique(); RegisterMemoryManager(mm.get()); RunSpecifiedBenchmarks(&reporter); EXPECT_EQ(reporter.store.size(), N_REPETITIONS); for (size_t i = 0; i < reporter.store.size(); i++) { EXPECT_EQ(reporter.store[i].num_allocs, static_cast(i)); EXPECT_EQ(reporter.store[i].max_bytes_used, static_cast(i) * 2); EXPECT_EQ(reporter.store[i].net_heap_growth, static_cast(i) * 4); EXPECT_EQ(reporter.store[i].total_allocated_bytes, static_cast(i) * 10); } } } // namespace ================================================ FILE: test/min_time_parse_gtest.cc ================================================ #include "../src/benchmark_runner.h" #include "gtest/gtest.h" namespace { TEST(ParseMinTimeTest, InvalidInput) { #if GTEST_HAS_DEATH_TEST // Tests only runnable in debug mode (when BM_CHECK is enabled). #ifndef NDEBUG #ifndef TEST_BENCHMARK_LIBRARY_HAS_NO_ASSERTIONS ASSERT_DEATH_IF_SUPPORTED( { benchmark::internal::ParseBenchMinTime("abc"); }, "Malformed seconds value passed to --benchmark_min_time: `abc`"); ASSERT_DEATH_IF_SUPPORTED( { benchmark::internal::ParseBenchMinTime("123ms"); }, "Malformed seconds value passed to --benchmark_min_time: `123ms`"); ASSERT_DEATH_IF_SUPPORTED( { benchmark::internal::ParseBenchMinTime("1z"); }, "Malformed seconds value passed to --benchmark_min_time: `1z`"); ASSERT_DEATH_IF_SUPPORTED( { benchmark::internal::ParseBenchMinTime("1hs"); }, "Malformed seconds value passed to --benchmark_min_time: `1hs`"); #endif #endif #endif } } // namespace ================================================ FILE: test/multiple_ranges_test.cc ================================================ #include #include #include #include #include "benchmark/benchmark_api.h" #include "benchmark/registration.h" #include "benchmark/state.h" #include "benchmark/utils.h" namespace { class MultipleRangesFixture : public ::benchmark::Fixture { public: MultipleRangesFixture() : expectedValues({{1, 3, 5}, {1, 3, 8}, {1, 3, 15}, {2, 3, 5}, {2, 3, 8}, {2, 3, 15}, {1, 4, 5}, {1, 4, 8}, {1, 4, 15}, {2, 4, 5}, {2, 4, 8}, {2, 4, 15}, {1, 7, 5}, {1, 7, 8}, {1, 7, 15}, {2, 7, 5}, {2, 7, 8}, {2, 7, 15}, {7, 6, 3}}) {} void SetUp(const ::benchmark::State& state) override { std::vector ranges = {state.range(0), state.range(1), state.range(2)}; assert(expectedValues.find(ranges) != expectedValues.end()); actualValues.insert(ranges); } // NOTE: This is not TearDown as we want to check after _all_ runs are // complete. ~MultipleRangesFixture() override { if (actualValues != expectedValues) { std::cout << "EXPECTED\n"; for (const auto& v : expectedValues) { std::cout << "{"; for (int64_t iv : v) { std::cout << iv << ", "; } std::cout << "}\n"; } std::cout << "ACTUAL\n"; for (const auto& v : actualValues) { std::cout << "{"; for (int64_t iv : v) { std::cout << iv << ", "; } std::cout << "}\n"; } } } std::set> expectedValues; std::set> actualValues; }; BENCHMARK_DEFINE_F(MultipleRangesFixture, Empty)(benchmark::State& state) { for (auto _ : state) { int64_t product = state.range(0) * state.range(1) * state.range(2); for (int64_t x = 0; x < product; x++) { benchmark::DoNotOptimize(x); } } } BENCHMARK_REGISTER_F(MultipleRangesFixture, Empty) ->RangeMultiplier(2) ->Ranges({{1, 2}, {3, 7}, {5, 15}}) ->Args({7, 6, 3}); void BM_CheckDefaultArgument(benchmark::State& state) { // Test that the 'range()' without an argument is the same as 'range(0)'. assert(state.range() == state.range(0)); assert(state.range() != state.range(1)); for (auto _ : state) { } } BENCHMARK(BM_CheckDefaultArgument)->Ranges({{1, 5}, {6, 10}}); void BM_MultipleRanges(benchmark::State& st) { for (auto _ : st) { } } BENCHMARK(BM_MultipleRanges)->Ranges({{5, 5}, {6, 6}}); } // end namespace BENCHMARK_MAIN(); ================================================ FILE: test/options_test.cc ================================================ #include #include #include "benchmark/benchmark_api.h" #include "benchmark/registration.h" #include "benchmark/state.h" #include "benchmark/types.h" #if defined(NDEBUG) #undef NDEBUG #endif #include namespace { void BM_basic(benchmark::State& state) { for (auto _ : state) { } } void BM_basic_slow(benchmark::State& state) { std::chrono::milliseconds sleep_duration(state.range(0)); for (auto _ : state) { std::this_thread::sleep_for( std::chrono::duration_cast(sleep_duration)); } } BENCHMARK(BM_basic); BENCHMARK(BM_basic)->Arg(42); BENCHMARK(BM_basic_slow)->Arg(10)->Unit(benchmark::kNanosecond); BENCHMARK(BM_basic_slow)->Arg(100)->Unit(benchmark::kMicrosecond); BENCHMARK(BM_basic_slow)->Arg(1000)->Unit(benchmark::kMillisecond); BENCHMARK(BM_basic_slow)->Arg(1000)->Unit(benchmark::kSecond); BENCHMARK(BM_basic)->Range(1, 8); BENCHMARK(BM_basic)->RangeMultiplier(2)->Range(1, 8); BENCHMARK(BM_basic)->DenseRange(10, 15); BENCHMARK(BM_basic)->Args({42, 42}); BENCHMARK(BM_basic)->Ranges({{64, 512}, {64, 512}}); BENCHMARK(BM_basic)->MinTime(0.7); BENCHMARK(BM_basic)->MinWarmUpTime(0.8); BENCHMARK(BM_basic)->MinTime(0.1)->MinWarmUpTime(0.2); BENCHMARK(BM_basic)->UseRealTime(); BENCHMARK(BM_basic)->ThreadRange(2, 4); BENCHMARK(BM_basic)->ThreadPerCpu(); BENCHMARK(BM_basic)->Repetitions(3); BENCHMARK(BM_basic) ->RangeMultiplier(std::numeric_limits::max()) ->Range(std::numeric_limits::min(), std::numeric_limits::max()); // Negative ranges BENCHMARK(BM_basic)->Range(-64, -1); BENCHMARK(BM_basic)->RangeMultiplier(4)->Range(-8, 8); BENCHMARK(BM_basic)->DenseRange(-2, 2, 1); BENCHMARK(BM_basic)->Ranges({{-64, 1}, {-8, -1}}); void CustomArgs(benchmark::Benchmark* b) { for (int i = 0; i < 10; ++i) { b->Arg(i); } } BENCHMARK(BM_basic)->Apply(CustomArgs); void BM_explicit_iteration_count(benchmark::State& state) { // Test that benchmarks specified with an explicit iteration count are // only run once. static bool invoked_before = false; assert(!invoked_before); invoked_before = true; // Test that the requested iteration count is respected. assert(state.max_iterations == 42); for (auto _ : state) { } assert(state.iterations() == state.max_iterations); assert(state.iterations() == 42); } BENCHMARK(BM_explicit_iteration_count)->Iterations(42); } // end namespace BENCHMARK_MAIN(); ================================================ FILE: test/output_test.h ================================================ #ifndef TEST_OUTPUT_TEST_H #define TEST_OUTPUT_TEST_H #undef NDEBUG #include #include #include #include #include #include #include #include "../src/re.h" #include "benchmark/benchmark_api.h" #define CONCAT2(x, y) x##y #define CONCAT(x, y) CONCAT2(x, y) #define ADD_CASES(...) \ const int CONCAT(dummy, __LINE__) = ::AddCases(__VA_ARGS__) #define SET_SUBSTITUTIONS(...) \ const int CONCAT(dummy, __LINE__) = ::SetSubstitutions(__VA_ARGS__) enum MatchRules : uint8_t { MR_Default, // Skip non-matching lines until a match is found. MR_Next, // Match must occur on the next line. MR_Not // No line between the current position and the next match matches // the regex }; struct TestCase { TestCase(std::string re, int rule = MR_Default); std::string regex_str; int match_rule; std::string substituted_regex; std::shared_ptr regex; }; enum TestCaseID : uint8_t { TC_ConsoleOut, TC_ConsoleErr, TC_JSONOut, TC_JSONErr, TC_CSVOut, TC_CSVErr, TC_NumID // PRIVATE }; // Add a list of test cases to be run against the output specified by // 'ID' int AddCases(TestCaseID ID, std::initializer_list il); // Add or set a list of substitutions to be performed on constructed regex's // See 'output_test_helper.cc' for a list of default substitutions. int SetSubstitutions( std::initializer_list> il); // Run all output tests. void RunOutputTests(int argc, char* argv[]); // Count the number of 'pat' substrings in the 'haystack' string. int SubstrCnt(const std::string& haystack, const std::string& pat); // Run registered benchmarks with file reporter enabled, and return the content // outputted by the file reporter. std::string GetFileReporterOutput(int argc, char* argv[]); // ========================================================================= // // ------------------------- Results checking ------------------------------ // // ========================================================================= // // Call this macro to register a benchmark for checking its results. This // should be all that's needed. It subscribes a function to check the (CSV) // results of a benchmark. This is done only after verifying that the output // strings are really as expected. // bm_name_pattern: a name or a regex pattern which will be matched against // all the benchmark names. Matching benchmarks // will be the subject of a call to checker_function // checker_function: should be of type ResultsCheckFn (see below) #define CHECK_BENCHMARK_RESULTS(bm_name_pattern, checker_function) \ const size_t CONCAT(dummy, __LINE__) = \ AddChecker(bm_name_pattern, checker_function) struct Results; typedef std::function ResultsCheckFn; size_t AddChecker(const std::string& bm_name_pattern, const ResultsCheckFn& fn); // Class holding the results of a benchmark. // It is passed in calls to checker functions. struct Results { // the benchmark name std::string name; // the benchmark fields std::map values; Results(const std::string& n) : name(n) {} int NumThreads() const; double NumIterations() const; typedef enum : uint8_t { kCpuTime, kRealTime } BenchmarkTime; // get cpu_time or real_time in seconds double GetTime(BenchmarkTime which) const; // get the real_time duration of the benchmark in seconds. // it is better to use fuzzy float checks for this, as the float // ASCII formatting is lossy. double DurationRealTime() const { return NumIterations() * GetTime(kRealTime); } // get the cpu_time duration of the benchmark in seconds double DurationCPUTime() const { return NumIterations() * GetTime(kCpuTime); } // get the string for a result by name, or nullptr if the name // is not found const std::string* Get(const std::string& entry_name) const { auto it = values.find(entry_name); if (it == values.end()) return nullptr; return &it->second; } // get a result by name, parsed as a specific type. // NOTE: for counters, use GetCounterAs instead. template T GetAs(const std::string& entry_name) const; // counters are written as doubles, so they have to be read first // as a double, and only then converted to the asked type. template T GetCounterAs(const std::string& entry_name) const { double dval = GetAs(entry_name); T tval = static_cast(dval); return tval; } }; template T Results::GetAs(const std::string& entry_name) const { auto* sv = Get(entry_name); BM_CHECK(sv != nullptr && !sv->empty()); std::stringstream ss; ss << *sv; T out; ss >> out; BM_CHECK(!ss.fail()); return out; } //---------------------------------- // Macros to help in result checking. Do not use them with arguments causing // side-effects. // clang-format off #define CHECK_RESULT_VALUE_IMPL(entry, getfn, var_type, var_name, relationship, value) \ CONCAT(BM_CHECK_, relationship) \ (entry.getfn< var_type >(var_name), (value)) << "\n" \ << __FILE__ << ":" << __LINE__ << ": " << (entry).name << ":\n" \ << __FILE__ << ":" << __LINE__ << ": " \ << "expected (" << #var_type << ")" << (var_name) \ << "=" << (entry).getfn< var_type >(var_name) \ << " to be " #relationship " to " << (value) << "\n" // check with tolerance. eps_factor is the tolerance window, which is // interpreted relative to value (eg, 0.1 means 10% of value). #define CHECK_FLOAT_RESULT_VALUE_IMPL(entry, getfn, var_type, var_name, relationship, value, eps_factor) \ CONCAT(BM_CHECK_FLOAT_, relationship) \ (entry.getfn< var_type >(var_name), (value), (eps_factor) * (value)) << "\n" \ << __FILE__ << ":" << __LINE__ << ": " << (entry).name << ":\n" \ << __FILE__ << ":" << __LINE__ << ": " \ << "expected (" << #var_type << ")" << (var_name) \ << "=" << (entry).getfn< var_type >(var_name) \ << " to be " #relationship " to " << (value) << "\n" \ << __FILE__ << ":" << __LINE__ << ": " \ << "with tolerance of " << (eps_factor) * (value) \ << " (" << (eps_factor)*100. << "%), " \ << "but delta was " << ((entry).getfn< var_type >(var_name) - (value)) \ << " (" << (((entry).getfn< var_type >(var_name) - (value)) \ / \ ((value) > 1.e-5 || value < -1.e-5 ? value : 1.e-5)*100.) \ << "%)" #define CHECK_RESULT_VALUE(entry, var_type, var_name, relationship, value) \ CHECK_RESULT_VALUE_IMPL(entry, GetAs, var_type, var_name, relationship, value) #define CHECK_COUNTER_VALUE(entry, var_type, var_name, relationship, value) \ CHECK_RESULT_VALUE_IMPL(entry, GetCounterAs, var_type, var_name, relationship, value) #define CHECK_FLOAT_RESULT_VALUE(entry, var_name, relationship, value, eps_factor) \ CHECK_FLOAT_RESULT_VALUE_IMPL(entry, GetAs, double, var_name, relationship, value, eps_factor) #define CHECK_FLOAT_COUNTER_VALUE(entry, var_name, relationship, value, eps_factor) \ CHECK_FLOAT_RESULT_VALUE_IMPL(entry, GetCounterAs, double, var_name, relationship, value, eps_factor) // clang-format on // ========================================================================= // // --------------------------- Misc Utilities ------------------------------ // // ========================================================================= // namespace { const char* const dec_re = "[0-9]*[.]?[0-9]+([eE][-+][0-9]+)?"; } // end namespace #endif // TEST_OUTPUT_TEST_H ================================================ FILE: test/output_test_helper.cc ================================================ #include #include #include #include #include #include #include #include #include #include "../src/benchmark_api_internal.h" #include "../src/check.h" // NOTE: check.h is for internal use only! #include "../src/log.h" // NOTE: log.h is for internal use only #include "../src/re.h" // NOTE: re.h is for internal use only #include "output_test.h" // ========================================================================= // // ------------------------------ Internals -------------------------------- // // ========================================================================= // namespace internal { namespace { using TestCaseList = std::vector; // Use a vector because the order elements are added matters during iteration. // std::map/unordered_map don't guarantee that. // For example: // SetSubstitutions({{"%HelloWorld", "Hello"}, {"%Hello", "Hi"}}); // Substitute("%HelloWorld") // Always expands to Hello. using SubMap = std::vector>; TestCaseList& GetTestCaseList(TestCaseID ID) { // Uses function-local statics to ensure initialization occurs // before first use. static TestCaseList lists[TC_NumID]; return lists[ID]; } SubMap& GetSubstitutions() { // Don't use 'dec_re' from header because it may not yet be initialized. // clang-format off static std::string safe_dec_re = "[0-9]*[.]?[0-9]+([eE][-+][0-9]+)?"; static std::string time_re = "([0-9]+[.])?[0-9]+"; static std::string percentage_re = "[0-9]+[.][0-9]{2}"; static SubMap map = { {"%float", "[0-9]*[.]?[0-9]+([eE][-+][0-9]+)?"}, // human-readable float {"%hrfloat", "[0-9]*[.]?[0-9]+([eE][-+][0-9]+)?[kKMGTPEZYmunpfazy]?i?"}, {"%percentage", percentage_re}, {"%int", "[ ]*[0-9]+"}, {" %s ", "[ ]+"}, {"%time", "[ ]*" + time_re + "[ ]+ns"}, {"%console_report", "[ ]*" + time_re + "[ ]+ns [ ]*" + time_re + "[ ]+ns [ ]*[0-9]+"}, {"%console_percentage_report", "[ ]*" + percentage_re + "[ ]+% [ ]*" + percentage_re + "[ ]+% [ ]*[0-9]+"}, {"%console_us_report", "[ ]*" + time_re + "[ ]+us [ ]*" + time_re + "[ ]+us [ ]*[0-9]+"}, {"%console_ms_report", "[ ]*" + time_re + "[ ]+ms [ ]*" + time_re + "[ ]+ms [ ]*[0-9]+"}, {"%console_s_report", "[ ]*" + time_re + "[ ]+s [ ]*" + time_re + "[ ]+s [ ]*[0-9]+"}, {"%console_time_only_report", "[ ]*" + time_re + "[ ]+ns [ ]*" + time_re + "[ ]+ns"}, {"%console_us_report", "[ ]*" + time_re + "[ ]+us [ ]*" + time_re + "[ ]+us [ ]*[0-9]+"}, {"%console_us_time_only_report", "[ ]*" + time_re + "[ ]+us [ ]*" + time_re + "[ ]+us"}, {"%csv_header", "name,iterations,real_time,cpu_time,time_unit,bytes_per_second," "items_per_second,label,error_occurred,error_message"}, {"%csv_report", "[0-9]+," + safe_dec_re + "," + safe_dec_re + ",ns,,,,,"}, {"%csv_us_report", "[0-9]+," + safe_dec_re + "," + safe_dec_re + ",us,,,,,"}, {"%csv_ms_report", "[0-9]+," + safe_dec_re + "," + safe_dec_re + ",ms,,,,,"}, {"%csv_s_report", "[0-9]+," + safe_dec_re + "," + safe_dec_re + ",s,,,,,"}, {"%csv_cv_report", "[0-9]+," + safe_dec_re + "," + safe_dec_re + ",,,,,,"}, {"%csv_bytes_report", "[0-9]+," + safe_dec_re + "," + safe_dec_re + ",ns," + safe_dec_re + ",,,,"}, {"%csv_items_report", "[0-9]+," + safe_dec_re + "," + safe_dec_re + ",ns,," + safe_dec_re + ",,,"}, {"%csv_bytes_items_report", "[0-9]+," + safe_dec_re + "," + safe_dec_re + ",ns," + safe_dec_re + "," + safe_dec_re + ",,,"}, {"%csv_label_report_begin", "[0-9]+," + safe_dec_re + "," + safe_dec_re + ",ns,,,"}, {"%csv_label_report_end", ",,"}}; // clang-format on return map; } std::string PerformSubstitutions(std::string source) { SubMap const& subs = GetSubstitutions(); using SizeT = std::string::size_type; for (auto const& KV : subs) { SizeT pos = 0; SizeT next_start = 0; while ((pos = source.find(KV.first, next_start)) != std::string::npos) { next_start = pos + KV.second.size(); source.replace(pos, KV.first.size(), KV.second); } } return source; } void CheckCase(std::stringstream& remaining_output, TestCase const& TC, TestCaseList const& not_checks) { std::string first_line; bool on_first = true; std::string line; while (!remaining_output.eof()) { BM_CHECK(remaining_output.good()); std::getline(remaining_output, line); if (on_first) { first_line = line; on_first = false; } for (const auto& NC : not_checks) { BM_CHECK(!NC.regex->Match(line)) << "Unexpected match for line \"" << line << "\" for MR_Not regex \"" << NC.regex_str << "\"" << "\n actual regex string \"" << TC.substituted_regex << "\"" << "\n started matching near: " << first_line; } if (TC.regex->Match(line)) { return; } BM_CHECK(TC.match_rule != MR_Next) << "Expected line \"" << line << "\" to match regex \"" << TC.regex_str << "\"" << "\n actual regex string \"" << TC.substituted_regex << "\"" << "\n started matching near: " << first_line; } BM_CHECK(remaining_output.eof() == false) << "End of output reached before match for regex \"" << TC.regex_str << "\" was found" << "\n actual regex string \"" << TC.substituted_regex << "\"" << "\n started matching near: " << first_line; } void CheckCases(TestCaseList const& checks, std::stringstream& output) { std::vector not_checks; for (size_t i = 0; i < checks.size(); ++i) { const auto& TC = checks[i]; if (TC.match_rule == MR_Not) { not_checks.push_back(TC); continue; } CheckCase(output, TC, not_checks); not_checks.clear(); } } class TestReporter : public benchmark::BenchmarkReporter { public: TestReporter(std::vector reps) : reporters_(std::move(reps)) {} bool ReportContext(const Context& context) override { bool last_ret = false; bool first = true; for (auto* rep : reporters_) { bool new_ret = rep->ReportContext(context); BM_CHECK(first || new_ret == last_ret) << "Reports return different values for ReportContext"; first = false; last_ret = new_ret; } (void)first; return last_ret; } void ReportRuns(const std::vector& report) override { for (auto* rep : reporters_) { rep->ReportRuns(report); } } void Finalize() override { for (auto* rep : reporters_) { rep->Finalize(); } } private: std::vector reporters_; }; } // namespace } // end namespace internal // ========================================================================= // // -------------------------- Results checking ----------------------------- // // ========================================================================= // namespace internal { // Utility class to manage subscribers for checking benchmark results. // It works by parsing the CSV output to read the results. class ResultsChecker { public: struct PatternAndFn : public TestCase { // reusing TestCase for its regexes PatternAndFn(const std::string& rx, ResultsCheckFn fn_) : TestCase(rx), fn(std::move(fn_)) {} ResultsCheckFn fn; }; std::vector check_patterns; std::vector results; std::vector field_names; void Add(const std::string& entry_pattern, const ResultsCheckFn& fn); void CheckResults(std::stringstream& output); private: void SetHeader_(const std::string& csv_header); void SetValues_(const std::string& entry_csv_line); std::vector SplitCsv_(const std::string& line) const; }; namespace { // store the static ResultsChecker in a function to prevent initialization // order problems ResultsChecker& GetResultsChecker() { static ResultsChecker rc; return rc; } } // end namespace // add a results checker for a benchmark void ResultsChecker::Add(const std::string& entry_pattern, const ResultsCheckFn& fn) { check_patterns.emplace_back(entry_pattern, fn); } // check the results of all subscribed benchmarks void ResultsChecker::CheckResults(std::stringstream& output) { // first reset the stream to the start { auto start = std::stringstream::pos_type(0); // clear before calling tellg() output.clear(); // seek to zero only when needed if (output.tellg() > start) { output.seekg(start); } // and just in case output.clear(); } // now go over every line and publish it to the ResultsChecker std::string line; bool on_first = true; while (!output.eof()) { BM_CHECK(output.good()); std::getline(output, line); if (on_first) { SetHeader_(line); // this is important on_first = false; continue; } SetValues_(line); } // finally we can call the subscribed check functions for (const auto& p : check_patterns) { BM_VLOG(2) << "--------------------------------\n"; BM_VLOG(2) << "checking for benchmarks matching " << p.regex_str << "...\n"; for (const auto& r : results) { if (!p.regex->Match(r.name)) { BM_VLOG(2) << p.regex_str << " is not matched by " << r.name << "\n"; continue; } BM_VLOG(2) << p.regex_str << " is matched by " << r.name << "\n"; BM_VLOG(1) << "Checking results of " << r.name << ": ... \n"; p.fn(r); BM_VLOG(1) << "Checking results of " << r.name << ": OK.\n"; } } } // prepare for the names in this header void ResultsChecker::SetHeader_(const std::string& csv_header) { field_names = SplitCsv_(csv_header); } // set the values for a benchmark void ResultsChecker::SetValues_(const std::string& entry_csv_line) { if (entry_csv_line.empty()) { return; } // some lines are empty BM_CHECK(!field_names.empty()); auto vals = SplitCsv_(entry_csv_line); BM_CHECK_EQ(vals.size(), field_names.size()); results.emplace_back(vals[0]); // vals[0] is the benchmark name auto& entry = results.back(); for (size_t i = 1, e = vals.size(); i < e; ++i) { entry.values[field_names[i]] = vals[i]; } } // a quick'n'dirty csv splitter (eliminating quotes) std::vector ResultsChecker::SplitCsv_( const std::string& line) const { std::vector out; if (line.empty()) { return out; } if (!field_names.empty()) { out.reserve(field_names.size()); } size_t prev = 0; size_t pos = line.find_first_of(','); size_t curr = pos; while (pos != std::string::npos) { BM_CHECK(curr > 0); if (line[prev] == '"') { ++prev; } if (line[curr - 1] == '"') { --curr; } out.push_back(line.substr(prev, curr - prev)); prev = pos + 1; pos = line.find_first_of(',', pos + 1); curr = pos; } curr = line.size(); if (line[prev] == '"') { ++prev; } if (line[curr - 1] == '"') { --curr; } out.push_back(line.substr(prev, curr - prev)); return out; } } // end namespace internal size_t AddChecker(const std::string& bm_name, const ResultsCheckFn& fn) { auto& rc = internal::GetResultsChecker(); rc.Add(bm_name, fn); return rc.results.size(); } int Results::NumThreads() const { auto pos = name.find("/threads:"); if (pos == std::string::npos) { return 1; } auto end = name.find('/', pos + 9); std::stringstream ss; ss << name.substr(pos + 9, end); int num = 1; ss >> num; BM_CHECK(!ss.fail()); return num; } double Results::NumIterations() const { return GetAs("iterations"); } double Results::GetTime(BenchmarkTime which) const { BM_CHECK(which == kCpuTime || which == kRealTime); const char* which_str = which == kCpuTime ? "cpu_time" : "real_time"; double val = GetAs(which_str); const auto* unit = Get("time_unit"); BM_CHECK(unit); if (*unit == "ns") { return val * 1.e-9; } if (*unit == "us") { return val * 1.e-6; } if (*unit == "ms") { return val * 1.e-3; } if (*unit == "s") { return val; } BM_CHECK(1 == 0) << "unknown time unit: " << *unit; return 0; } // ========================================================================= // // -------------------------- Public API Definitions------------------------ // // ========================================================================= // TestCase::TestCase(std::string re, int rule) : regex_str(std::move(re)), match_rule(rule), substituted_regex(internal::PerformSubstitutions(regex_str)), regex(std::make_shared()) { std::string err_str; regex->Init(substituted_regex, &err_str); BM_CHECK(err_str.empty()) << "Could not construct regex \"" << substituted_regex << "\"" << "\n originally \"" << regex_str << "\"" << "\n got error: " << err_str; } int AddCases(TestCaseID ID, std::initializer_list il) { auto& L = internal::GetTestCaseList(ID); L.insert(L.end(), il); return 0; } int SetSubstitutions( std::initializer_list> il) { auto& subs = internal::GetSubstitutions(); for (auto KV : il) { bool exists = false; KV.second = internal::PerformSubstitutions(KV.second); for (auto& EKV : subs) { if (EKV.first == KV.first) { EKV.second = std::move(KV.second); exists = true; break; } } if (!exists) { subs.push_back(std::move(KV)); } } return 0; } // Disable deprecated warnings temporarily because we need to reference // CSVReporter but don't want to trigger -Werror=-Wdeprecated-declarations BENCHMARK_DISABLE_DEPRECATED_WARNING void RunOutputTests(int argc, char* argv[]) { using internal::GetTestCaseList; benchmark::Initialize(&argc, argv); auto options = benchmark::internal::GetOutputOptions(/*force_no_color*/ true); benchmark::ConsoleReporter CR(options); benchmark::JSONReporter JR; benchmark::CSVReporter CSVR; struct ReporterTest { std::string name; std::vector& output_cases; std::vector& error_cases; benchmark::BenchmarkReporter& reporter; std::stringstream out_stream; std::stringstream err_stream; ReporterTest(const std::string& n, std::vector& out_tc, std::vector& err_tc, benchmark::BenchmarkReporter& br) : name(n), output_cases(out_tc), error_cases(err_tc), reporter(br) { reporter.SetOutputStream(&out_stream); reporter.SetErrorStream(&err_stream); } } TestCases[] = { {std::string("ConsoleReporter"), GetTestCaseList(TC_ConsoleOut), GetTestCaseList(TC_ConsoleErr), CR}, {std::string("JSONReporter"), GetTestCaseList(TC_JSONOut), GetTestCaseList(TC_JSONErr), JR}, {std::string("CSVReporter"), GetTestCaseList(TC_CSVOut), GetTestCaseList(TC_CSVErr), CSVR}, }; // Create the test reporter and run the benchmarks. std::cout << "Running benchmarks...\n"; internal::TestReporter test_rep({&CR, &JR, &CSVR}); benchmark::RunSpecifiedBenchmarks(&test_rep); for (auto& rep_test : TestCases) { std::string msg = std::string("\nTesting ") + rep_test.name + std::string(" Output\n"); std::string banner(msg.size() - 1, '-'); std::cout << banner << msg << banner << "\n"; std::cerr << rep_test.err_stream.str(); std::cout << rep_test.out_stream.str(); internal::CheckCases(rep_test.error_cases, rep_test.err_stream); internal::CheckCases(rep_test.output_cases, rep_test.out_stream); std::cout << "\n"; } // now that we know the output is as expected, we can dispatch // the checks to subscribees. auto& csv = TestCases[2]; // would use == but gcc spits a warning BM_CHECK(csv.name == std::string("CSVReporter")); internal::GetResultsChecker().CheckResults(csv.out_stream); } BENCHMARK_RESTORE_DEPRECATED_WARNING int SubstrCnt(const std::string& haystack, const std::string& pat) { if (pat.length() == 0) { return 0; } int count = 0; for (size_t offset = haystack.find(pat); offset != std::string::npos; offset = haystack.find(pat, offset + pat.length())) { ++count; } return count; } namespace { char ToHex(int ch) { return ch < 10 ? static_cast('0' + ch) : static_cast('a' + (ch - 10)); } char RandomHexChar() { static std::mt19937 rd{std::random_device{}()}; static std::uniform_int_distribution mrand{0, 15}; return ToHex(mrand(rd)); } std::string GetRandomFileName() { std::string model = "test.%%%%%%"; for (auto& ch : model) { if (ch == '%') { ch = RandomHexChar(); } } return model; } bool FileExists(std::string const& name) { std::ifstream in(name.c_str()); return in.good(); } std::string GetTempFileName() { // This function attempts to avoid race conditions where two tests // create the same file at the same time. However, it still introduces races // similar to tmpnam. int retries = 3; while (--retries != 0) { std::string name = GetRandomFileName(); if (!FileExists(name)) { return name; } } std::cerr << "Failed to create unique temporary file name\n"; std::flush(std::cerr); std::exit(1); } } // end namespace std::string GetFileReporterOutput(int argc, char* argv[]) { std::vector new_argv(argv, argv + argc); assert(static_cast(argc) == new_argv.size()); std::string tmp_file_name = GetTempFileName(); std::cout << "Will be using this as the tmp file: " << tmp_file_name << '\n'; std::string tmp = "--benchmark_out="; tmp += tmp_file_name; new_argv.emplace_back(const_cast(tmp.c_str())); argc = static_cast(new_argv.size()); benchmark::Initialize(&argc, new_argv.data()); benchmark::RunSpecifiedBenchmarks(); // Read the output back from the file, and delete the file. std::ifstream tmp_stream(tmp_file_name); std::string output = std::string((std::istreambuf_iterator(tmp_stream)), std::istreambuf_iterator()); std::remove(tmp_file_name.c_str()); return output; } ================================================ FILE: test/overload_test.cc ================================================ #include "benchmark/macros.h" #include "benchmark/registration.h" #include "benchmark/state.h" namespace { // Simulate an overloaded function name. // This version does nothing and is just here to create ambiguity for // MyOverloadedBenchmark. BENCHMARK_UNUSED void MyOverloadedBenchmark() {} // This is the actual benchmark function we want to register. // It has the signature void(benchmark::State&) required by the library. void MyOverloadedBenchmark(benchmark::State& state) { for (auto _ : state) { } } // This macro invocation should compile correctly if benchmark.h // contains the fix (using static_cast), but would fail to compile // if the benchmark name were ambiguous (e.g., when using + or no cast // with an overloaded function). BENCHMARK(MyOverloadedBenchmark); // Also test BENCHMARK_TEMPLATE with an overloaded name. template void MyTemplatedOverloadedBenchmark() {} template void MyTemplatedOverloadedBenchmark(benchmark::State& state) { for (auto _ : state) { } } BENCHMARK_TEMPLATE(MyTemplatedOverloadedBenchmark, 1); } // end namespace BENCHMARK_MAIN(); ================================================ FILE: test/perf_counters_gtest.cc ================================================ #include #include #include "../src/perf_counters.h" #include "gmock/gmock.h" #include "gtest/gtest.h" #ifndef GTEST_SKIP struct MsgHandler { void operator=(std::ostream&) {} }; #define GTEST_SKIP() return MsgHandler() = std::cout #endif using benchmark::internal::PerfCounters; using benchmark::internal::PerfCountersMeasurement; using benchmark::internal::PerfCounterValues; using ::testing::AllOf; using ::testing::Gt; using ::testing::Lt; namespace { const char kGenericPerfEvent1[] = "CYCLES"; const char kGenericPerfEvent2[] = "INSTRUCTIONS"; TEST(PerfCountersTest, Init) { EXPECT_EQ(PerfCounters::Initialize(), PerfCounters::kSupported); } // Generic events will have as many counters as there are CPU PMUs, and each // will have the same name. In order to make these tests independent of the // number of CPU PMUs in the system, we uniquify the counter names before // testing them. static std::set UniqueCounterNames(const PerfCounters& pc) { std::set names{pc.names().begin(), pc.names().end()}; return names; } TEST(PerfCountersTest, OneCounter) { if (!PerfCounters::kSupported) { GTEST_SKIP() << "Performance counters not supported.\n"; } EXPECT_TRUE(PerfCounters::Initialize()); EXPECT_EQ( UniqueCounterNames(PerfCounters::Create({kGenericPerfEvent1})).size(), 1); } TEST(PerfCountersTest, NegativeTest) { if (!PerfCounters::kSupported) { EXPECT_FALSE(PerfCounters::Initialize()); return; } EXPECT_TRUE(PerfCounters::Initialize()); // Safety checks // Create() will always create a valid object, even if passed no or // wrong arguments as the new behavior is to warn and drop unsupported // counters EXPECT_EQ(PerfCounters::Create({}).num_counters(), 0); EXPECT_EQ(PerfCounters::Create({""}).num_counters(), 0); EXPECT_EQ(PerfCounters::Create({"not a counter name"}).num_counters(), 0); { // Try sneaking in a bad egg to see if it is filtered out. The // number of counters has to be two, not zero auto counter = PerfCounters::Create({kGenericPerfEvent2, "", kGenericPerfEvent1}); auto names = UniqueCounterNames(counter); EXPECT_EQ(names.size(), 2); EXPECT_EQ(names, std::set({kGenericPerfEvent2, kGenericPerfEvent1})); } { // Try sneaking in an outrageous counter, like a fat finger mistake auto counter = PerfCounters::Create( {kGenericPerfEvent2, "not a counter name", kGenericPerfEvent1}); auto names = UniqueCounterNames(counter); EXPECT_EQ(names.size(), 2); EXPECT_EQ(names, std::set({kGenericPerfEvent2, kGenericPerfEvent1})); } { // Finally try a golden input - it should like both of them EXPECT_EQ(UniqueCounterNames(PerfCounters::Create( {kGenericPerfEvent1, kGenericPerfEvent2})) .size(), 2); } { // Add a bad apple in the end of the chain to check the edges auto counter = PerfCounters::Create( {kGenericPerfEvent1, kGenericPerfEvent2, "bad event name"}); auto names = UniqueCounterNames(counter); EXPECT_EQ(names.size(), 2); EXPECT_EQ(names, std::set({kGenericPerfEvent1, kGenericPerfEvent2})); } } static std::map SnapshotAndCombine( PerfCounters& counters) { PerfCounterValues values(counters.num_counters()); std::map value_map; if (counters.Snapshot(&values)) { for (size_t i = 0; i != counters.num_counters(); ++i) { value_map[counters.names()[i]] += values[i]; } } return value_map; } TEST(PerfCountersTest, Read1Counter) { if (!PerfCounters::kSupported) { GTEST_SKIP() << "Test skipped because libpfm is not supported.\n"; } EXPECT_TRUE(PerfCounters::Initialize()); auto counters = PerfCounters::Create({kGenericPerfEvent1}); auto values1 = SnapshotAndCombine(counters); EXPECT_EQ(values1.size(), 1); EXPECT_GT(values1.begin()->second, 0); auto values2 = SnapshotAndCombine(counters); EXPECT_EQ(values2.size(), 1); EXPECT_GT(values2.begin()->second, 0); EXPECT_GT(values2.begin()->second, values1.begin()->second); } TEST(PerfCountersTest, Read1CounterEachCPU) { if (!PerfCounters::kSupported) { GTEST_SKIP() << "Test skipped because libpfm is not supported.\n"; } #ifdef __linux__ EXPECT_TRUE(PerfCounters::Initialize()); cpu_set_t saved_set; if (sched_getaffinity(0, sizeof(saved_set), &saved_set) != 0) { // This can happen e.g. if there are more than CPU_SETSIZE CPUs. GTEST_SKIP() << "Could not save CPU affinity mask.\n"; } for (size_t cpu = 0; cpu != CPU_SETSIZE; ++cpu) { cpu_set_t set; CPU_ZERO(&set); CPU_SET(cpu, &set); if (sched_setaffinity(0, sizeof(set), &set) != 0) { break; } auto counters = PerfCounters::Create({kGenericPerfEvent1}); auto values1 = SnapshotAndCombine(counters); EXPECT_EQ(values1.size(), 1); EXPECT_GT(values1.begin()->second, 0); auto values2 = SnapshotAndCombine(counters); EXPECT_EQ(values2.size(), 1); EXPECT_GT(values2.begin()->second, 0); EXPECT_GT(values2.begin()->second, values1.begin()->second); } EXPECT_EQ(sched_setaffinity(0, sizeof(saved_set), &saved_set), 0); #else GTEST_SKIP() << "Test skipped on non-Linux.\n"; #endif } TEST(PerfCountersTest, Read2Counters) { if (!PerfCounters::kSupported) { GTEST_SKIP() << "Test skipped because libpfm is not supported.\n"; } EXPECT_TRUE(PerfCounters::Initialize()); auto counters = PerfCounters::Create({kGenericPerfEvent1, kGenericPerfEvent2}); auto values1 = SnapshotAndCombine(counters); EXPECT_EQ(values1.size(), 2); for (auto& kv : values1) { EXPECT_GT(kv.second, 0); } auto values2 = SnapshotAndCombine(counters); EXPECT_EQ(values1.size(), 2); for (auto& kv : values2) { EXPECT_GT(kv.second, 0); EXPECT_GT(kv.second, values1[kv.first]); } } TEST(PerfCountersTest, ReopenExistingCounters) { // This test works in recent and old Intel hardware, Pixel 3, and Pixel 6. // However we cannot make assumptions beyond 2 HW counters due to Pixel 6. if (!PerfCounters::kSupported) { GTEST_SKIP() << "Test skipped because libpfm is not supported.\n"; } EXPECT_TRUE(PerfCounters::Initialize()); std::vector kMetrics({kGenericPerfEvent1}); std::vector counters(2); for (auto& counter : counters) { counter = PerfCounters::Create(kMetrics); } PerfCounterValues values(counters[0].num_counters()); EXPECT_TRUE(counters[0].Snapshot(&values)); EXPECT_TRUE(counters[1].Snapshot(&values)); } TEST(PerfCountersTest, CreateExistingMeasurements) { // The test works (i.e. causes read to fail) for the assumptions // about hardware capabilities (i.e. small number (2) hardware // counters) at this date, // the same as previous test ReopenExistingCounters. if (!PerfCounters::kSupported) { GTEST_SKIP() << "Test skipped because libpfm is not supported.\n"; } EXPECT_TRUE(PerfCounters::Initialize()); // This means we will try 10 counters but we can only guarantee // for sure at this time that only 3 will work. Perhaps in the future // we could use libpfm to query for the hardware limits on this // particular platform. const int kMaxCounters = 10; const int kMinValidCounters = 2; // Let's use a ubiquitous counter that is guaranteed to work // on all platforms const std::vector kMetrics{"cycles"}; // Cannot create a vector of actual objects because the // copy constructor of PerfCounters is deleted - and so is // implicitly deleted on PerfCountersMeasurement too std::vector> perf_counter_measurements; perf_counter_measurements.reserve(kMaxCounters); for (int j = 0; j < kMaxCounters; ++j) { perf_counter_measurements.emplace_back( new PerfCountersMeasurement(kMetrics)); } std::vector> measurements; // Start all counters together to see if they hold size_t max_counters = kMaxCounters; for (size_t i = 0; i < kMaxCounters; ++i) { auto& counter(*perf_counter_measurements[i]); std::set names{counter.names().begin(), counter.names().end()}; EXPECT_EQ(names.size(), 1); if (!counter.Start()) { max_counters = i; break; }; } ASSERT_GE(max_counters, kMinValidCounters); // Start all together for (size_t i = 0; i < max_counters; ++i) { auto& counter(*perf_counter_measurements[i]); EXPECT_TRUE(counter.Stop(measurements) || (i >= kMinValidCounters)); } // Start/stop individually for (size_t i = 0; i < max_counters; ++i) { auto& counter(*perf_counter_measurements[i]); measurements.clear(); counter.Start(); EXPECT_TRUE(counter.Stop(measurements) || (i >= kMinValidCounters)); } } // We try to do some meaningful work here but the compiler // insists in optimizing away our loop so we had to add a // no-optimize macro. In case it fails, we added some entropy // to this pool as well. BENCHMARK_DONT_OPTIMIZE size_t do_work() { static std::mt19937 rd{std::random_device{}()}; static std::uniform_int_distribution mrand(0, 10); const size_t kNumLoops = 1000000; size_t sum = 0; for (size_t j = 0; j < kNumLoops; ++j) { sum += mrand(rd); } benchmark::DoNotOptimize(sum); return sum; } void measure(size_t threadcount, std::map* before, std::map* after) { BM_CHECK_NE(before, nullptr); BM_CHECK_NE(after, nullptr); std::vector threads(threadcount); auto work = [&]() { BM_CHECK(do_work() > 1000); }; // We need to first set up the counters, then start the threads, so the // threads would inherit the counters. But later, we need to first destroy // the thread pool (so all the work finishes), then measure the counters. So // the scopes overlap, and we need to explicitly control the scope of the // threadpool. auto counters = PerfCounters::Create({kGenericPerfEvent1, kGenericPerfEvent2}); for (auto& t : threads) { t = std::thread(work); } *before = SnapshotAndCombine(counters); for (auto& t : threads) { t.join(); } *after = SnapshotAndCombine(counters); } TEST(PerfCountersTest, MultiThreaded) { if (!PerfCounters::kSupported) { GTEST_SKIP() << "Test skipped because libpfm is not supported."; } EXPECT_TRUE(PerfCounters::Initialize()); std::map before, after; // Notice that this test will work even if we taskset it to a single CPU // In this case the threads will run sequentially // Start two threads and measure the number of combined cycles and // instructions measure(2, &before, &after); std::vector Elapsed2Threads{ static_cast(after[kGenericPerfEvent1] - before[kGenericPerfEvent1]), static_cast(after[kGenericPerfEvent2] - before[kGenericPerfEvent2])}; // Start four threads and measure the number of combined cycles and // instructions measure(4, &before, &after); std::vector Elapsed4Threads{ static_cast(after[kGenericPerfEvent1] - before[kGenericPerfEvent1]), static_cast(after[kGenericPerfEvent2] - before[kGenericPerfEvent2])}; // The following expectations fail (at least on a beefy workstation with lots // of cpus) - it seems that in some circumstances the runtime of 4 threads // can even be better than with 2. // So instead of expecting 4 threads to be slower, let's just make sure they // do not differ too much in general (one is not more than 10x than the // other). EXPECT_THAT(Elapsed4Threads[0] / Elapsed2Threads[0], AllOf(Gt(0.1), Lt(10))); EXPECT_THAT(Elapsed4Threads[1] / Elapsed2Threads[1], AllOf(Gt(0.1), Lt(10))); } TEST(PerfCountersTest, HardwareLimits) { // The test works (i.e. causes read to fail) for the assumptions // about hardware capabilities (i.e. small number (3-4) hardware // counters) at this date, // the same as previous test ReopenExistingCounters. if (!PerfCounters::kSupported) { GTEST_SKIP() << "Test skipped because libpfm is not supported.\n"; } EXPECT_TRUE(PerfCounters::Initialize()); // Taken from `perf list`, but focusses only on those HW events that actually // were reported when running `sudo perf stat -a sleep 10`, intersected over // several platforms. All HW events listed in the first command not reported // in the second seem to not work. This is sad as we don't really get to test // the grouping here (groups can contain up to 6 members)... std::vector counter_names{ "cycles", // leader "instructions", // "branch-misses", // }; // In the off-chance that some of these values are not supported, // we filter them out so the test will complete without failure // albeit it might not actually test the grouping on that platform std::vector valid_names; for (const std::string& name : counter_names) { if (PerfCounters::IsCounterSupported(name)) { valid_names.push_back(name); } } PerfCountersMeasurement counter(valid_names); std::vector> measurements; counter.Start(); EXPECT_TRUE(counter.Stop(measurements)); } } // namespace ================================================ FILE: test/perf_counters_test.cc ================================================ #include #undef NDEBUG #include "../src/commandlineflags.h" #include "../src/perf_counters.h" #include "benchmark/benchmark_api.h" #include "benchmark/registration.h" #include "benchmark/state.h" #include "benchmark/utils.h" #include "output_test.h" namespace benchmark { BM_DECLARE_string(benchmark_perf_counters); } // namespace benchmark namespace { void BM_Simple(benchmark::State& state) { for (auto _ : state) { auto iterations = double(state.iterations()) * double(state.iterations()); benchmark::DoNotOptimize(iterations); } } BENCHMARK(BM_Simple); ADD_CASES(TC_JSONOut, {{"\"name\": \"BM_Simple\",$"}}); const int kIters = 1000000; void BM_WithoutPauseResume(benchmark::State& state) { int n = 0; for (auto _ : state) { for (auto i = 0; i < kIters; ++i) { n = 1 - n; benchmark::DoNotOptimize(n); } } } BENCHMARK(BM_WithoutPauseResume); ADD_CASES(TC_JSONOut, {{"\"name\": \"BM_WithoutPauseResume\",$"}}); void BM_WithPauseResume(benchmark::State& state) { int m = 0, n = 0; for (auto _ : state) { for (auto i = 0; i < kIters; ++i) { n = 1 - n; benchmark::DoNotOptimize(n); } state.PauseTiming(); for (auto j = 0; j < kIters; ++j) { m = 1 - m; benchmark::DoNotOptimize(m); } state.ResumeTiming(); } } BENCHMARK(BM_WithPauseResume); ADD_CASES(TC_JSONOut, {{"\"name\": \"BM_WithPauseResume\",$"}}); static void CheckSimple(Results const& e) { CHECK_COUNTER_VALUE(e, double, "CYCLES", GT, 0); } double withoutPauseResumeInstrCount = 0.0; double withPauseResumeInstrCount = 0.0; void SaveInstrCountWithoutResume(Results const& e) { withoutPauseResumeInstrCount = e.GetAs("INSTRUCTIONS"); } void SaveInstrCountWithResume(Results const& e) { withPauseResumeInstrCount = e.GetAs("INSTRUCTIONS"); } CHECK_BENCHMARK_RESULTS("BM_Simple", &CheckSimple); CHECK_BENCHMARK_RESULTS("BM_WithoutPauseResume", &SaveInstrCountWithoutResume); CHECK_BENCHMARK_RESULTS("BM_WithPauseResume", &SaveInstrCountWithResume); } // end namespace int main(int argc, char* argv[]) { benchmark::MaybeReenterWithoutASLR(argc, argv); if (!benchmark::internal::PerfCounters::kSupported) { return 0; } benchmark::FLAGS_benchmark_perf_counters = "CYCLES,INSTRUCTIONS"; benchmark::internal::PerfCounters::Initialize(); RunOutputTests(argc, argv); BM_CHECK_GT(withPauseResumeInstrCount, kIters); BM_CHECK_GT(withoutPauseResumeInstrCount, kIters); BM_CHECK_LT(withPauseResumeInstrCount, 1.5 * withoutPauseResumeInstrCount); } ================================================ FILE: test/profiler_manager_gtest.cc ================================================ #include #include "benchmark/managers.h" #include "benchmark/registration.h" #include "benchmark/state.h" #include "benchmark/utils.h" #include "gtest/gtest.h" namespace { class TestProfilerManager : public benchmark::ProfilerManager { public: void AfterSetupStart() override { ++start_called; } void BeforeTeardownStop() override { ++stop_called; } int start_called = 0; int stop_called = 0; }; void BM_empty(benchmark::State& state) { for (auto _ : state) { auto iterations = state.iterations(); benchmark::DoNotOptimize(iterations); } } BENCHMARK(BM_empty); TEST(ProfilerManager, ReregisterManager) { #if GTEST_HAS_DEATH_TEST // Tests only runnable in debug mode (when BM_CHECK is enabled). #ifndef NDEBUG #ifndef TEST_BENCHMARK_LIBRARY_HAS_NO_ASSERTIONS ASSERT_DEATH_IF_SUPPORTED( { std::unique_ptr pm(new TestProfilerManager()); benchmark::RegisterProfilerManager(pm.get()); benchmark::RegisterProfilerManager(pm.get()); }, "RegisterProfilerManager"); #endif #endif #endif } } // namespace ================================================ FILE: test/profiler_manager_iterations_test.cc ================================================ #include #include #include #include #include "benchmark/benchmark_api.h" #include "benchmark/managers.h" #include "benchmark/registration.h" #include "benchmark/reporter.h" #include "benchmark/state.h" // Tests that we can specify the number of profiler iterations with // --benchmark_min_time=x. namespace { int iteration_count = 0; int end_profiler_iteration_count = 0; class TestProfilerManager : public benchmark::ProfilerManager { void AfterSetupStart() override { iteration_count = 0; } void BeforeTeardownStop() override { end_profiler_iteration_count = iteration_count; } }; class NullReporter : public benchmark::BenchmarkReporter { public: bool ReportContext(const Context& /*context*/) override { return true; } void ReportRuns(const std::vector& /* report */) override {} }; void BM_MyBench(benchmark::State& state) { for (auto s : state) { ++iteration_count; } } BENCHMARK(BM_MyBench); } // end namespace int main(int argc, char** argv) { benchmark::MaybeReenterWithoutASLR(argc, argv); // Make a fake argv and append the new --benchmark_profiler_iterations= // to it. int fake_argc = argc + 1; std::vector fake_argv(static_cast(fake_argc)); for (size_t i = 0; i < static_cast(argc); ++i) { fake_argv[i] = argv[i]; } fake_argv[static_cast(argc)] = "--benchmark_min_time=4x"; std::unique_ptr pm(new TestProfilerManager()); benchmark::RegisterProfilerManager(pm.get()); benchmark::Initialize(&fake_argc, const_cast(fake_argv.data())); NullReporter null_reporter; const size_t returned_count = benchmark::RunSpecifiedBenchmarks(&null_reporter, "BM_MyBench"); assert(returned_count == 1); // Check the executed iters. assert(end_profiler_iteration_count == 4); benchmark::RegisterProfilerManager(nullptr); return 0; } ================================================ FILE: test/profiler_manager_test.cc ================================================ // FIXME: WIP #include #include #include "benchmark/benchmark_api.h" #include "benchmark/managers.h" #include "benchmark/registration.h" #include "benchmark/state.h" #include "benchmark/utils.h" #include "output_test.h" namespace { class TestProfilerManager : public benchmark::ProfilerManager { public: void AfterSetupStart() override { ++start_called; } void BeforeTeardownStop() override { ++stop_called; } int start_called = 0; int stop_called = 0; }; void BM_empty(benchmark::State& state) { for (auto _ : state) { auto iterations = state.iterations(); benchmark::DoNotOptimize(iterations); } } BENCHMARK(BM_empty); ADD_CASES(TC_ConsoleOut, {{"^BM_empty %console_report$"}}); ADD_CASES(TC_JSONOut, {{"\"name\": \"BM_empty\",$"}, {"\"family_index\": 0,$", MR_Next}, {"\"per_family_instance_index\": 0,$", MR_Next}, {"\"run_name\": \"BM_empty\",$", MR_Next}, {"\"run_type\": \"iteration\",$", MR_Next}, {"\"repetitions\": 1,$", MR_Next}, {"\"repetition_index\": 0,$", MR_Next}, {"\"threads\": 1,$", MR_Next}, {"\"iterations\": %int,$", MR_Next}, {"\"real_time\": %float,$", MR_Next}, {"\"cpu_time\": %float,$", MR_Next}, {"\"time_unit\": \"ns\"$", MR_Next}, {"}", MR_Next}}); ADD_CASES(TC_CSVOut, {{"^\"BM_empty\",%csv_report$"}}); } // end namespace int main(int argc, char* argv[]) { benchmark::MaybeReenterWithoutASLR(argc, argv); std::unique_ptr pm(new TestProfilerManager()); benchmark::RegisterProfilerManager(pm.get()); RunOutputTests(argc, argv); benchmark::RegisterProfilerManager(nullptr); assert(pm->start_called == 1); assert(pm->stop_called == 1); } ================================================ FILE: test/register_benchmark_test.cc ================================================ #undef NDEBUG #include #include #include "../src/check.h" // NOTE: check.h is for internal use only! #include "benchmark/benchmark_api.h" #include "benchmark/registration.h" #include "benchmark/reporter.h" #include "benchmark/state.h" namespace { class TestReporter : public benchmark::ConsoleReporter { public: void ReportRuns(const std::vector& report) override { all_runs_.insert(all_runs_.end(), begin(report), end(report)); ConsoleReporter::ReportRuns(report); } std::vector all_runs_; }; struct TestCase { const std::string name; const std::string label; // Note: not explicit as we rely on it being converted through ADD_CASES. TestCase(const std::string& xname) : TestCase(xname, "") {} TestCase(const std::string& xname, const std::string& xlabel) : name(xname), label(xlabel) {} typedef benchmark::BenchmarkReporter::Run Run; void CheckRun(Run const& run) const { // clang-format off BM_CHECK(name == run.benchmark_name()) << "expected " << name << " got " << run.benchmark_name(); if (!label.empty()) { BM_CHECK(run.report_label == label) << "expected " << label << " got " << run.report_label; } else { BM_CHECK(run.report_label.empty()); } // clang-format on } }; std::vector ExpectedResults; int AddCases(std::initializer_list const& v) { for (const auto& N : v) { ExpectedResults.push_back(N); } return 0; } #define CONCAT(x, y) CONCAT2(x, y) #define CONCAT2(x, y) x##y #define ADD_CASES(...) \ const int CONCAT(dummy, __LINE__) = AddCases({__VA_ARGS__}) using ReturnVal = benchmark::Benchmark const* const; //----------------------------------------------------------------------------// // Test RegisterBenchmark with no additional arguments //----------------------------------------------------------------------------// void BM_function(benchmark::State& state) { for (auto _ : state) { } } BENCHMARK(BM_function); ReturnVal dummy = benchmark::RegisterBenchmark( "BM_function_manual_registration", BM_function); ADD_CASES({"BM_function"}, {"BM_function_manual_registration"}); //----------------------------------------------------------------------------// // Test RegisterBenchmark with additional arguments // Note: GCC <= 4.8 do not support this form of RegisterBenchmark because they // reject the variadic pack expansion of lambda captures. //----------------------------------------------------------------------------// void BM_extra_args(benchmark::State& st, const char* label) { for (auto _ : st) { } st.SetLabel(label); } int RegisterFromFunction() { std::pair cases[] = { {"test1", "One"}, {"test2", "Two"}, {"test3", "Three"}}; for (auto const& c : cases) { benchmark::RegisterBenchmark(c.first, &BM_extra_args, c.second); } return 0; } const int dummy2 = RegisterFromFunction(); ADD_CASES({"test1", "One"}, {"test2", "Two"}, {"test3", "Three"}); //----------------------------------------------------------------------------// // Test RegisterBenchmark with DISABLED_ benchmark //----------------------------------------------------------------------------// void DISABLED_BM_function(benchmark::State& state) { for (auto _ : state) { } } BENCHMARK(DISABLED_BM_function); ReturnVal dummy3 = benchmark::RegisterBenchmark("DISABLED_BM_function_manual", DISABLED_BM_function); // No need to add cases because we don't expect them to run. //----------------------------------------------------------------------------// // Test BENCHMARK_NAMED: verifies name format "func/test_case_name" and that // chaining (e.g. ->Threads()) works, without introducing a lambda. //----------------------------------------------------------------------------// void BM_named(benchmark::State& state) { for (auto _ : state) { } } BENCHMARK_NAMED(BM_named, variant_a); BENCHMARK_NAMED(BM_named, variant_b); BENCHMARK_NAMED(BM_named, variant_c)->Threads(2); ADD_CASES({"BM_named/variant_a"}, {"BM_named/variant_b"}, {"BM_named/variant_c/threads:2"}); //----------------------------------------------------------------------------// // Test RegisterBenchmark with different callable types //----------------------------------------------------------------------------// struct CustomFixture { void operator()(benchmark::State& st) { for (auto _ : st) { } } }; void TestRegistrationAtRuntime() { { CustomFixture fx; benchmark::RegisterBenchmark("custom_fixture", fx); AddCases({std::string("custom_fixture")}); } { const char* x = "42"; auto capturing_lam = [=](benchmark::State& st) { for (auto _ : st) { } st.SetLabel(x); }; benchmark::RegisterBenchmark("lambda_benchmark", capturing_lam); AddCases({{"lambda_benchmark", x}}); } } // Test that all benchmarks, registered at either during static init or runtime, // are run and the results are passed to the reported. void RunTestOne() { TestRegistrationAtRuntime(); TestReporter test_reporter; benchmark::RunSpecifiedBenchmarks(&test_reporter); typedef benchmark::BenchmarkReporter::Run Run; auto EB = ExpectedResults.begin(); for (Run const& run : test_reporter.all_runs_) { assert(EB != ExpectedResults.end()); EB->CheckRun(run); ++EB; } assert(EB == ExpectedResults.end()); } // Test that ClearRegisteredBenchmarks() clears all previously registered // benchmarks. // Also test that new benchmarks can be registered and ran afterwards. void RunTestTwo() { assert(!ExpectedResults.empty() && "must have at least one registered benchmark"); ExpectedResults.clear(); benchmark::ClearRegisteredBenchmarks(); TestReporter test_reporter; size_t num_ran = benchmark::RunSpecifiedBenchmarks(&test_reporter); assert(num_ran == 0); assert(test_reporter.all_runs_.begin() == test_reporter.all_runs_.end()); TestRegistrationAtRuntime(); num_ran = benchmark::RunSpecifiedBenchmarks(&test_reporter); assert(num_ran == ExpectedResults.size()); typedef benchmark::BenchmarkReporter::Run Run; auto EB = ExpectedResults.begin(); for (Run const& run : test_reporter.all_runs_) { assert(EB != ExpectedResults.end()); EB->CheckRun(run); ++EB; } assert(EB == ExpectedResults.end()); } } // end namespace int main(int argc, char* argv[]) { benchmark::MaybeReenterWithoutASLR(argc, argv); benchmark::Initialize(&argc, argv); RunTestOne(); RunTestTwo(); } ================================================ FILE: test/repetitions_test.cc ================================================ #include "benchmark/benchmark_api.h" #include "benchmark/registration.h" #include "benchmark/state.h" #include "output_test.h" namespace { // ========================================================================= // // ------------------------ Testing Basic Output --------------------------- // // ========================================================================= // void BM_ExplicitRepetitions(benchmark::State& state) { for (auto _ : state) { } } BENCHMARK(BM_ExplicitRepetitions)->Repetitions(2); ADD_CASES(TC_ConsoleOut, {{"^BM_ExplicitRepetitions/repeats:2 %console_report$"}}); ADD_CASES(TC_ConsoleOut, {{"^BM_ExplicitRepetitions/repeats:2 %console_report$"}}); ADD_CASES(TC_ConsoleOut, {{"^BM_ExplicitRepetitions/repeats:2_mean %console_report$"}}); ADD_CASES(TC_ConsoleOut, {{"^BM_ExplicitRepetitions/repeats:2_median %console_report$"}}); ADD_CASES(TC_ConsoleOut, {{"^BM_ExplicitRepetitions/repeats:2_stddev %console_report$"}}); ADD_CASES(TC_JSONOut, {{"\"name\": \"BM_ExplicitRepetitions/repeats:2\",$"}, {"\"family_index\": 0,$", MR_Next}, {"\"per_family_instance_index\": 0,$", MR_Next}, {"\"run_name\": \"BM_ExplicitRepetitions/repeats:2\",$", MR_Next}, {"\"run_type\": \"iteration\",$", MR_Next}, {"\"repetitions\": 2,$", MR_Next}, {"\"repetition_index\": 0,$", MR_Next}, {"\"threads\": 1,$", MR_Next}, {"\"iterations\": %int,$", MR_Next}, {"\"real_time\": %float,$", MR_Next}, {"\"cpu_time\": %float,$", MR_Next}, {"\"time_unit\": \"ns\"$", MR_Next}, {"}", MR_Next}}); ADD_CASES(TC_JSONOut, {{"\"name\": \"BM_ExplicitRepetitions/repeats:2\",$"}, {"\"family_index\": 0,$", MR_Next}, {"\"per_family_instance_index\": 0,$", MR_Next}, {"\"run_name\": \"BM_ExplicitRepetitions/repeats:2\",$", MR_Next}, {"\"run_type\": \"iteration\",$", MR_Next}, {"\"repetitions\": 2,$", MR_Next}, {"\"repetition_index\": 1,$", MR_Next}, {"\"threads\": 1,$", MR_Next}, {"\"iterations\": %int,$", MR_Next}, {"\"real_time\": %float,$", MR_Next}, {"\"cpu_time\": %float,$", MR_Next}, {"\"time_unit\": \"ns\"$", MR_Next}, {"}", MR_Next}}); ADD_CASES(TC_JSONOut, {{"\"name\": \"BM_ExplicitRepetitions/repeats:2_mean\",$"}, {"\"family_index\": 0,$", MR_Next}, {"\"per_family_instance_index\": 0,$", MR_Next}, {"\"run_name\": \"BM_ExplicitRepetitions/repeats:2\",$", MR_Next}, {"\"run_type\": \"aggregate\",$", MR_Next}, {"\"repetitions\": 2,$", MR_Next}, {"\"threads\": 1,$", MR_Next}, {"\"aggregate_name\": \"mean\",$", MR_Next}, {"\"aggregate_unit\": \"time\",$", MR_Next}, {"\"iterations\": %int,$", MR_Next}, {"\"real_time\": %float,$", MR_Next}, {"\"cpu_time\": %float,$", MR_Next}, {"\"time_unit\": \"ns\"$", MR_Next}, {"}", MR_Next}}); ADD_CASES(TC_JSONOut, {{"\"name\": \"BM_ExplicitRepetitions/repeats:2_median\",$"}, {"\"family_index\": 0,$", MR_Next}, {"\"per_family_instance_index\": 0,$", MR_Next}, {"\"run_name\": \"BM_ExplicitRepetitions/repeats:2\",$", MR_Next}, {"\"run_type\": \"aggregate\",$", MR_Next}, {"\"repetitions\": 2,$", MR_Next}, {"\"threads\": 1,$", MR_Next}, {"\"aggregate_name\": \"median\",$", MR_Next}, {"\"aggregate_unit\": \"time\",$", MR_Next}, {"\"iterations\": %int,$", MR_Next}, {"\"real_time\": %float,$", MR_Next}, {"\"cpu_time\": %float,$", MR_Next}, {"\"time_unit\": \"ns\"$", MR_Next}, {"}", MR_Next}}); ADD_CASES(TC_JSONOut, {{"\"name\": \"BM_ExplicitRepetitions/repeats:2_stddev\",$"}, {"\"family_index\": 0,$", MR_Next}, {"\"per_family_instance_index\": 0,$", MR_Next}, {"\"run_name\": \"BM_ExplicitRepetitions/repeats:2\",$", MR_Next}, {"\"run_type\": \"aggregate\",$", MR_Next}, {"\"repetitions\": 2,$", MR_Next}, {"\"threads\": 1,$", MR_Next}, {"\"aggregate_name\": \"stddev\",$", MR_Next}, {"\"aggregate_unit\": \"time\",$", MR_Next}, {"\"iterations\": %int,$", MR_Next}, {"\"real_time\": %float,$", MR_Next}, {"\"cpu_time\": %float,$", MR_Next}, {"\"time_unit\": \"ns\"$", MR_Next}, {"}", MR_Next}}); ADD_CASES(TC_CSVOut, {{"^\"BM_ExplicitRepetitions/repeats:2\",%csv_report$"}}); ADD_CASES(TC_CSVOut, {{"^\"BM_ExplicitRepetitions/repeats:2\",%csv_report$"}}); ADD_CASES(TC_CSVOut, {{"^\"BM_ExplicitRepetitions/repeats:2_mean\",%csv_report$"}}); ADD_CASES(TC_CSVOut, {{"^\"BM_ExplicitRepetitions/repeats:2_median\",%csv_report$"}}); ADD_CASES(TC_CSVOut, {{"^\"BM_ExplicitRepetitions/repeats:2_stddev\",%csv_report$"}}); // ========================================================================= // // ------------------------ Testing Basic Output --------------------------- // // ========================================================================= // void BM_ImplicitRepetitions(benchmark::State& state) { for (auto _ : state) { } } BENCHMARK(BM_ImplicitRepetitions); ADD_CASES(TC_ConsoleOut, {{"^BM_ImplicitRepetitions %console_report$"}}); ADD_CASES(TC_ConsoleOut, {{"^BM_ImplicitRepetitions %console_report$"}}); ADD_CASES(TC_ConsoleOut, {{"^BM_ImplicitRepetitions %console_report$"}}); ADD_CASES(TC_ConsoleOut, {{"^BM_ImplicitRepetitions_mean %console_report$"}}); ADD_CASES(TC_ConsoleOut, {{"^BM_ImplicitRepetitions_median %console_report$"}}); ADD_CASES(TC_ConsoleOut, {{"^BM_ImplicitRepetitions_stddev %console_report$"}}); ADD_CASES(TC_JSONOut, {{"\"name\": \"BM_ImplicitRepetitions\",$"}, {"\"family_index\": 1,$", MR_Next}, {"\"per_family_instance_index\": 0,$", MR_Next}, {"\"run_name\": \"BM_ImplicitRepetitions\",$", MR_Next}, {"\"run_type\": \"iteration\",$", MR_Next}, {"\"repetitions\": 3,$", MR_Next}, {"\"repetition_index\": 0,$", MR_Next}, {"\"threads\": 1,$", MR_Next}, {"\"iterations\": %int,$", MR_Next}, {"\"real_time\": %float,$", MR_Next}, {"\"cpu_time\": %float,$", MR_Next}, {"\"time_unit\": \"ns\"$", MR_Next}, {"}", MR_Next}}); ADD_CASES(TC_JSONOut, {{"\"name\": \"BM_ImplicitRepetitions\",$"}, {"\"family_index\": 1,$", MR_Next}, {"\"per_family_instance_index\": 0,$", MR_Next}, {"\"run_name\": \"BM_ImplicitRepetitions\",$", MR_Next}, {"\"run_type\": \"iteration\",$", MR_Next}, {"\"repetitions\": 3,$", MR_Next}, {"\"repetition_index\": 1,$", MR_Next}, {"\"threads\": 1,$", MR_Next}, {"\"iterations\": %int,$", MR_Next}, {"\"real_time\": %float,$", MR_Next}, {"\"cpu_time\": %float,$", MR_Next}, {"\"time_unit\": \"ns\"$", MR_Next}, {"}", MR_Next}}); ADD_CASES(TC_JSONOut, {{"\"name\": \"BM_ImplicitRepetitions\",$"}, {"\"family_index\": 1,$", MR_Next}, {"\"per_family_instance_index\": 0,$", MR_Next}, {"\"run_name\": \"BM_ImplicitRepetitions\",$", MR_Next}, {"\"run_type\": \"iteration\",$", MR_Next}, {"\"repetitions\": 3,$", MR_Next}, {"\"repetition_index\": 2,$", MR_Next}, {"\"threads\": 1,$", MR_Next}, {"\"iterations\": %int,$", MR_Next}, {"\"real_time\": %float,$", MR_Next}, {"\"cpu_time\": %float,$", MR_Next}, {"\"time_unit\": \"ns\"$", MR_Next}, {"}", MR_Next}}); ADD_CASES(TC_JSONOut, {{"\"name\": \"BM_ImplicitRepetitions_mean\",$"}, {"\"family_index\": 1,$", MR_Next}, {"\"per_family_instance_index\": 0,$", MR_Next}, {"\"run_name\": \"BM_ImplicitRepetitions\",$", MR_Next}, {"\"run_type\": \"aggregate\",$", MR_Next}, {"\"repetitions\": 3,$", MR_Next}, {"\"threads\": 1,$", MR_Next}, {"\"aggregate_name\": \"mean\",$", MR_Next}, {"\"aggregate_unit\": \"time\",$", MR_Next}, {"\"iterations\": %int,$", MR_Next}, {"\"real_time\": %float,$", MR_Next}, {"\"cpu_time\": %float,$", MR_Next}, {"\"time_unit\": \"ns\"$", MR_Next}, {"}", MR_Next}}); ADD_CASES(TC_JSONOut, {{"\"name\": \"BM_ImplicitRepetitions_median\",$"}, {"\"family_index\": 1,$", MR_Next}, {"\"per_family_instance_index\": 0,$", MR_Next}, {"\"run_name\": \"BM_ImplicitRepetitions\",$", MR_Next}, {"\"run_type\": \"aggregate\",$", MR_Next}, {"\"repetitions\": 3,$", MR_Next}, {"\"threads\": 1,$", MR_Next}, {"\"aggregate_name\": \"median\",$", MR_Next}, {"\"aggregate_unit\": \"time\",$", MR_Next}, {"\"iterations\": %int,$", MR_Next}, {"\"real_time\": %float,$", MR_Next}, {"\"cpu_time\": %float,$", MR_Next}, {"\"time_unit\": \"ns\"$", MR_Next}, {"}", MR_Next}}); ADD_CASES(TC_JSONOut, {{"\"name\": \"BM_ImplicitRepetitions_stddev\",$"}, {"\"family_index\": 1,$", MR_Next}, {"\"per_family_instance_index\": 0,$", MR_Next}, {"\"run_name\": \"BM_ImplicitRepetitions\",$", MR_Next}, {"\"run_type\": \"aggregate\",$", MR_Next}, {"\"repetitions\": 3,$", MR_Next}, {"\"threads\": 1,$", MR_Next}, {"\"aggregate_name\": \"stddev\",$", MR_Next}, {"\"aggregate_unit\": \"time\",$", MR_Next}, {"\"iterations\": %int,$", MR_Next}, {"\"real_time\": %float,$", MR_Next}, {"\"cpu_time\": %float,$", MR_Next}, {"\"time_unit\": \"ns\"$", MR_Next}, {"}", MR_Next}}); ADD_CASES(TC_CSVOut, {{"^\"BM_ImplicitRepetitions\",%csv_report$"}}); ADD_CASES(TC_CSVOut, {{"^\"BM_ImplicitRepetitions\",%csv_report$"}}); ADD_CASES(TC_CSVOut, {{"^\"BM_ImplicitRepetitions_mean\",%csv_report$"}}); ADD_CASES(TC_CSVOut, {{"^\"BM_ImplicitRepetitions_median\",%csv_report$"}}); ADD_CASES(TC_CSVOut, {{"^\"BM_ImplicitRepetitions_stddev\",%csv_report$"}}); } // end namespace // ========================================================================= // // --------------------------- TEST CASES END ------------------------------ // // ========================================================================= // int main(int argc, char* argv[]) { benchmark::MaybeReenterWithoutASLR(argc, argv); RunOutputTests(argc, argv); } ================================================ FILE: test/report_aggregates_only_test.cc ================================================ #undef NDEBUG #include #include #include "benchmark/benchmark_api.h" #include "benchmark/registration.h" #include "benchmark/state.h" #include "output_test.h" namespace { // Ok this test is super ugly. We want to check what happens with the file // reporter in the presence of ReportAggregatesOnly(). // We do not care about console output, the normal tests check that already. void BM_SummaryRepeat(benchmark::State& state) { for (auto _ : state) { } } BENCHMARK(BM_SummaryRepeat)->Repetitions(3)->ReportAggregatesOnly(); } // end namespace int main(int argc, char* argv[]) { benchmark::MaybeReenterWithoutASLR(argc, argv); const std::string output = GetFileReporterOutput(argc, argv); if (SubstrCnt(output, "\"name\": \"BM_SummaryRepeat/repeats:3") != 4 || SubstrCnt(output, "\"name\": \"BM_SummaryRepeat/repeats:3_mean\"") != 1 || SubstrCnt(output, "\"name\": \"BM_SummaryRepeat/repeats:3_median\"") != 1 || SubstrCnt(output, "\"name\": \"BM_SummaryRepeat/repeats:3_stddev\"") != 1 || SubstrCnt(output, "\"name\": \"BM_SummaryRepeat/repeats:3_cv\"") != 1) { std::cout << "Precondition mismatch. Expected to only find four " "occurrences of \"BM_SummaryRepeat/repeats:3\" substring:\n" "\"name\": \"BM_SummaryRepeat/repeats:3_mean\", " "\"name\": \"BM_SummaryRepeat/repeats:3_median\", " "\"name\": \"BM_SummaryRepeat/repeats:3_stddev\", " "\"name\": \"BM_SummaryRepeat/repeats:3_cv\"\nThe entire " "output:\n"; std::cout << output; return 1; } return 0; } ================================================ FILE: test/reporter_output_test.cc ================================================ #undef NDEBUG #include "benchmark/benchmark_api.h" #include "benchmark/registration.h" #include "benchmark/state.h" #include "benchmark/statistics.h" #include "benchmark/sysinfo.h" #include "benchmark/types.h" #include "benchmark/utils.h" #include "output_test.h" namespace { // ========================================================================= // // ---------------------- Testing Prologue Output -------------------------- // // ========================================================================= // ADD_CASES(TC_ConsoleOut, {{"^[-]+$", MR_Next}, {"^Benchmark %s Time %s CPU %s Iterations$", MR_Next}, {"^[-]+$", MR_Next}}); int AddContextCases() { AddCases(TC_ConsoleErr, { {"^%int-%int-%intT%int:%int:%int[-+]%int:%int$", MR_Default}, {"Running .*(/|\\\\)reporter_output_test(\\.exe)?$", MR_Next}, {"Run on \\(%int X %float MHz CPU s?\\)", MR_Next}, }); AddCases(TC_JSONOut, {{"^\\{", MR_Default}, {"\"context\":", MR_Next}, {"\"date\": \"", MR_Next}, {"\"host_name\":", MR_Next}, {"\"executable\": \".*(/|\\\\)reporter_output_test(\\.exe)?\",", MR_Next}, {"\"num_cpus\": %int,$", MR_Next}, {"\"mhz_per_cpu\": %float,$", MR_Next}, {"\"caches\": \\[$", MR_Default}}); auto const& Info = benchmark::CPUInfo::Get(); auto const& Caches = Info.caches; if (!Caches.empty()) { AddCases(TC_ConsoleErr, {{"CPU Caches:$", MR_Next}}); } for (size_t I = 0; I < Caches.size(); ++I) { std::string num_caches_str = Caches[I].num_sharing != 0 ? " \\(x%int\\)$" : "$"; AddCases(TC_ConsoleErr, {{"L%int (Data|Instruction|Unified) %int KiB" + num_caches_str, MR_Next}}); AddCases(TC_JSONOut, {{"\\{$", MR_Next}, {"\"type\": \"", MR_Next}, {"\"level\": %int,$", MR_Next}, {"\"size\": %int,$", MR_Next}, {"\"num_sharing\": %int$", MR_Next}, {"}[,]{0,1}$", MR_Next}}); } AddCases(TC_JSONOut, {{"],$"}}); auto const& LoadAvg = Info.load_avg; if (!LoadAvg.empty()) { AddCases(TC_ConsoleErr, {{"Load Average: (%float, ){0,2}%float$", MR_Next}}); } AddCases(TC_JSONOut, {{"\"load_avg\": \\[(%float,?){0,3}],$", MR_Next}}); AddCases(TC_JSONOut, {{"\"library_version\": \".*\",$", MR_Next}}); AddCases(TC_JSONOut, {{"\"library_build_type\": \".*\",$", MR_Next}}); AddCases(TC_JSONOut, {{"\"json_schema_version\": 1$", MR_Next}}); return 0; } const int dummy_register = AddContextCases(); ADD_CASES(TC_CSVOut, {{"%csv_header"}}); // ========================================================================= // // ------------------------ Testing Basic Output --------------------------- // // ========================================================================= // void BM_basic(benchmark::State& state) { for (auto _ : state) { } } BENCHMARK(BM_basic); ADD_CASES(TC_ConsoleOut, {{"^BM_basic %console_report$"}}); ADD_CASES(TC_JSONOut, {{"\"name\": \"BM_basic\",$"}, {"\"family_index\": 0,$", MR_Next}, {"\"per_family_instance_index\": 0,$", MR_Next}, {"\"run_name\": \"BM_basic\",$", MR_Next}, {"\"run_type\": \"iteration\",$", MR_Next}, {"\"repetitions\": 1,$", MR_Next}, {"\"repetition_index\": 0,$", MR_Next}, {"\"threads\": 1,$", MR_Next}, {"\"iterations\": %int,$", MR_Next}, {"\"real_time\": %float,$", MR_Next}, {"\"cpu_time\": %float,$", MR_Next}, {"\"time_unit\": \"ns\"$", MR_Next}, {"}", MR_Next}}); ADD_CASES(TC_CSVOut, {{"^\"BM_basic\",%csv_report$"}}); // ========================================================================= // // ------------------------ Testing Bytes per Second Output ---------------- // // ========================================================================= // void BM_bytes_per_second(benchmark::State& state) { for (auto _ : state) { // This test requires a non-zero CPU time to avoid divide-by-zero auto iterations = static_cast(state.iterations()) * static_cast(state.iterations()); benchmark::DoNotOptimize(iterations); } state.SetBytesProcessed(1); } BENCHMARK(BM_bytes_per_second); ADD_CASES(TC_ConsoleOut, {{"^BM_bytes_per_second %console_report " "bytes_per_second=%float[kM]{0,1}/s$"}}); ADD_CASES(TC_JSONOut, {{"\"name\": \"BM_bytes_per_second\",$"}, {"\"family_index\": 1,$", MR_Next}, {"\"per_family_instance_index\": 0,$", MR_Next}, {"\"run_name\": \"BM_bytes_per_second\",$", MR_Next}, {"\"run_type\": \"iteration\",$", MR_Next}, {"\"repetitions\": 1,$", MR_Next}, {"\"repetition_index\": 0,$", MR_Next}, {"\"threads\": 1,$", MR_Next}, {"\"iterations\": %int,$", MR_Next}, {"\"real_time\": %float,$", MR_Next}, {"\"cpu_time\": %float,$", MR_Next}, {"\"time_unit\": \"ns\",$", MR_Next}, {"\"bytes_per_second\": %float$", MR_Next}, {"}", MR_Next}}); ADD_CASES(TC_CSVOut, {{"^\"BM_bytes_per_second\",%csv_bytes_report$"}}); // ========================================================================= // // ------------------------ Testing Items per Second Output ---------------- // // ========================================================================= // void BM_items_per_second(benchmark::State& state) { for (auto _ : state) { // This test requires a non-zero CPU time to avoid divide-by-zero auto iterations = static_cast(state.iterations()) * static_cast(state.iterations()); benchmark::DoNotOptimize(iterations); } state.SetItemsProcessed(1); } BENCHMARK(BM_items_per_second); ADD_CASES(TC_ConsoleOut, {{"^BM_items_per_second %console_report " "items_per_second=%float[kM]{0,1}/s$"}}); ADD_CASES(TC_JSONOut, {{"\"name\": \"BM_items_per_second\",$"}, {"\"family_index\": 2,$", MR_Next}, {"\"per_family_instance_index\": 0,$", MR_Next}, {"\"run_name\": \"BM_items_per_second\",$", MR_Next}, {"\"run_type\": \"iteration\",$", MR_Next}, {"\"repetitions\": 1,$", MR_Next}, {"\"repetition_index\": 0,$", MR_Next}, {"\"threads\": 1,$", MR_Next}, {"\"iterations\": %int,$", MR_Next}, {"\"real_time\": %float,$", MR_Next}, {"\"cpu_time\": %float,$", MR_Next}, {"\"time_unit\": \"ns\",$", MR_Next}, {"\"items_per_second\": %float$", MR_Next}, {"}", MR_Next}}); ADD_CASES(TC_CSVOut, {{"^\"BM_items_per_second\",%csv_items_report$"}}); // ========================================================================= // // ------------------------ Testing Label Output --------------------------- // // ========================================================================= // void BM_label(benchmark::State& state) { for (auto _ : state) { } state.SetLabel("some label"); } BENCHMARK(BM_label); ADD_CASES(TC_ConsoleOut, {{"^BM_label %console_report some label$"}}); ADD_CASES(TC_JSONOut, {{"\"name\": \"BM_label\",$"}, {"\"family_index\": 3,$", MR_Next}, {"\"per_family_instance_index\": 0,$", MR_Next}, {"\"run_name\": \"BM_label\",$", MR_Next}, {"\"run_type\": \"iteration\",$", MR_Next}, {"\"repetitions\": 1,$", MR_Next}, {"\"repetition_index\": 0,$", MR_Next}, {"\"threads\": 1,$", MR_Next}, {"\"iterations\": %int,$", MR_Next}, {"\"real_time\": %float,$", MR_Next}, {"\"cpu_time\": %float,$", MR_Next}, {"\"time_unit\": \"ns\",$", MR_Next}, {"\"label\": \"some label\"$", MR_Next}, {"}", MR_Next}}); ADD_CASES(TC_CSVOut, {{"^\"BM_label\",%csv_label_report_begin\"some " "label\"%csv_label_report_end$"}}); // ========================================================================= // // ------------------------ Testing Time Label Output ---------------------- // // ========================================================================= // void BM_time_label_nanosecond(benchmark::State& state) { for (auto _ : state) { } } BENCHMARK(BM_time_label_nanosecond)->Unit(benchmark::kNanosecond); ADD_CASES(TC_ConsoleOut, {{"^BM_time_label_nanosecond %console_report$"}}); ADD_CASES(TC_JSONOut, {{"\"name\": \"BM_time_label_nanosecond\",$"}, {"\"family_index\": 4,$", MR_Next}, {"\"per_family_instance_index\": 0,$", MR_Next}, {"\"run_name\": \"BM_time_label_nanosecond\",$", MR_Next}, {"\"run_type\": \"iteration\",$", MR_Next}, {"\"repetitions\": 1,$", MR_Next}, {"\"repetition_index\": 0,$", MR_Next}, {"\"threads\": 1,$", MR_Next}, {"\"iterations\": %int,$", MR_Next}, {"\"real_time\": %float,$", MR_Next}, {"\"cpu_time\": %float,$", MR_Next}, {"\"time_unit\": \"ns\"$", MR_Next}, {"}", MR_Next}}); ADD_CASES(TC_CSVOut, {{"^\"BM_time_label_nanosecond\",%csv_report$"}}); void BM_time_label_microsecond(benchmark::State& state) { for (auto _ : state) { } } BENCHMARK(BM_time_label_microsecond)->Unit(benchmark::kMicrosecond); ADD_CASES(TC_ConsoleOut, {{"^BM_time_label_microsecond %console_us_report$"}}); ADD_CASES(TC_JSONOut, {{"\"name\": \"BM_time_label_microsecond\",$"}, {"\"family_index\": 5,$", MR_Next}, {"\"per_family_instance_index\": 0,$", MR_Next}, {"\"run_name\": \"BM_time_label_microsecond\",$", MR_Next}, {"\"run_type\": \"iteration\",$", MR_Next}, {"\"repetitions\": 1,$", MR_Next}, {"\"repetition_index\": 0,$", MR_Next}, {"\"threads\": 1,$", MR_Next}, {"\"iterations\": %int,$", MR_Next}, {"\"real_time\": %float,$", MR_Next}, {"\"cpu_time\": %float,$", MR_Next}, {"\"time_unit\": \"us\"$", MR_Next}, {"}", MR_Next}}); ADD_CASES(TC_CSVOut, {{"^\"BM_time_label_microsecond\",%csv_us_report$"}}); void BM_time_label_millisecond(benchmark::State& state) { for (auto _ : state) { } } BENCHMARK(BM_time_label_millisecond)->Unit(benchmark::kMillisecond); ADD_CASES(TC_ConsoleOut, {{"^BM_time_label_millisecond %console_ms_report$"}}); ADD_CASES(TC_JSONOut, {{"\"name\": \"BM_time_label_millisecond\",$"}, {"\"family_index\": 6,$", MR_Next}, {"\"per_family_instance_index\": 0,$", MR_Next}, {"\"run_name\": \"BM_time_label_millisecond\",$", MR_Next}, {"\"run_type\": \"iteration\",$", MR_Next}, {"\"repetitions\": 1,$", MR_Next}, {"\"repetition_index\": 0,$", MR_Next}, {"\"threads\": 1,$", MR_Next}, {"\"iterations\": %int,$", MR_Next}, {"\"real_time\": %float,$", MR_Next}, {"\"cpu_time\": %float,$", MR_Next}, {"\"time_unit\": \"ms\"$", MR_Next}, {"}", MR_Next}}); ADD_CASES(TC_CSVOut, {{"^\"BM_time_label_millisecond\",%csv_ms_report$"}}); void BM_time_label_second(benchmark::State& state) { for (auto _ : state) { } } BENCHMARK(BM_time_label_second)->Unit(benchmark::kSecond); ADD_CASES(TC_ConsoleOut, {{"^BM_time_label_second %console_s_report$"}}); ADD_CASES(TC_JSONOut, {{"\"name\": \"BM_time_label_second\",$"}, {"\"family_index\": 7,$", MR_Next}, {"\"per_family_instance_index\": 0,$", MR_Next}, {"\"run_name\": \"BM_time_label_second\",$", MR_Next}, {"\"run_type\": \"iteration\",$", MR_Next}, {"\"repetitions\": 1,$", MR_Next}, {"\"repetition_index\": 0,$", MR_Next}, {"\"threads\": 1,$", MR_Next}, {"\"iterations\": %int,$", MR_Next}, {"\"real_time\": %float,$", MR_Next}, {"\"cpu_time\": %float,$", MR_Next}, {"\"time_unit\": \"s\"$", MR_Next}, {"}", MR_Next}}); ADD_CASES(TC_CSVOut, {{"^\"BM_time_label_second\",%csv_s_report$"}}); // ========================================================================= // // ------------------------ Testing Error Output --------------------------- // // ========================================================================= // void BM_error(benchmark::State& state) { state.SkipWithError("message"); for (auto _ : state) { } } BENCHMARK(BM_error); ADD_CASES(TC_ConsoleOut, {{"^BM_error[ ]+ERROR OCCURRED: 'message'$"}}); ADD_CASES(TC_JSONOut, {{"\"name\": \"BM_error\",$"}, {"\"family_index\": 8,$", MR_Next}, {"\"per_family_instance_index\": 0,$", MR_Next}, {"\"run_name\": \"BM_error\",$", MR_Next}, {"\"run_type\": \"iteration\",$", MR_Next}, {"\"repetitions\": 1,$", MR_Next}, {"\"repetition_index\": 0,$", MR_Next}, {"\"threads\": 1,$", MR_Next}, {"\"error_occurred\": true,$", MR_Next}, {"\"error_message\": \"message\",$", MR_Next}}); ADD_CASES(TC_CSVOut, {{"^\"BM_error\",,,,,,,,true,\"message\"$"}}); // ========================================================================= // // ------------------------ Testing No Arg Name Output ----------------------- // // // ========================================================================= // void BM_no_arg_name(benchmark::State& state) { for (auto _ : state) { } } BENCHMARK(BM_no_arg_name)->Arg(3); ADD_CASES(TC_ConsoleOut, {{"^BM_no_arg_name/3 %console_report$"}}); ADD_CASES(TC_JSONOut, {{"\"name\": \"BM_no_arg_name/3\",$"}, {"\"family_index\": 9,$", MR_Next}, {"\"per_family_instance_index\": 0,$", MR_Next}, {"\"run_name\": \"BM_no_arg_name/3\",$", MR_Next}, {"\"run_type\": \"iteration\",$", MR_Next}, {"\"repetitions\": 1,$", MR_Next}, {"\"repetition_index\": 0,$", MR_Next}, {"\"threads\": 1,$", MR_Next}}); ADD_CASES(TC_CSVOut, {{"^\"BM_no_arg_name/3\",%csv_report$"}}); // ========================================================================= // // ------------------------ Testing Arg Name Output ------------------------ // // ========================================================================= // void BM_arg_name(benchmark::State& state) { for (auto _ : state) { } } BENCHMARK(BM_arg_name)->ArgName("first")->Arg(3); ADD_CASES(TC_ConsoleOut, {{"^BM_arg_name/first:3 %console_report$"}}); ADD_CASES(TC_JSONOut, {{"\"name\": \"BM_arg_name/first:3\",$"}, {"\"family_index\": 10,$", MR_Next}, {"\"per_family_instance_index\": 0,$", MR_Next}, {"\"run_name\": \"BM_arg_name/first:3\",$", MR_Next}, {"\"run_type\": \"iteration\",$", MR_Next}, {"\"repetitions\": 1,$", MR_Next}, {"\"repetition_index\": 0,$", MR_Next}, {"\"threads\": 1,$", MR_Next}}); ADD_CASES(TC_CSVOut, {{"^\"BM_arg_name/first:3\",%csv_report$"}}); // ========================================================================= // // ------------------------ Testing Arg Names Output ----------------------- // // ========================================================================= // void BM_arg_names(benchmark::State& state) { for (auto _ : state) { } } BENCHMARK(BM_arg_names)->Args({2, 5, 4})->ArgNames({"first", "", "third"}); ADD_CASES(TC_ConsoleOut, {{"^BM_arg_names/first:2/5/third:4 %console_report$"}}); ADD_CASES(TC_JSONOut, {{"\"name\": \"BM_arg_names/first:2/5/third:4\",$"}, {"\"family_index\": 11,$", MR_Next}, {"\"per_family_instance_index\": 0,$", MR_Next}, {"\"run_name\": \"BM_arg_names/first:2/5/third:4\",$", MR_Next}, {"\"run_type\": \"iteration\",$", MR_Next}, {"\"repetitions\": 1,$", MR_Next}, {"\"repetition_index\": 0,$", MR_Next}, {"\"threads\": 1,$", MR_Next}}); ADD_CASES(TC_CSVOut, {{"^\"BM_arg_names/first:2/5/third:4\",%csv_report$"}}); // ========================================================================= // // ------------------------ Testing Name Output ---------------------------- // // ========================================================================= // void BM_name(benchmark::State& state) { for (auto _ : state) { } } BENCHMARK(BM_name)->Name("BM_custom_name"); ADD_CASES(TC_ConsoleOut, {{"^BM_custom_name %console_report$"}}); ADD_CASES(TC_JSONOut, {{"\"name\": \"BM_custom_name\",$"}, {"\"family_index\": 12,$", MR_Next}, {"\"per_family_instance_index\": 0,$", MR_Next}, {"\"run_name\": \"BM_custom_name\",$", MR_Next}, {"\"run_type\": \"iteration\",$", MR_Next}, {"\"repetitions\": 1,$", MR_Next}, {"\"repetition_index\": 0,$", MR_Next}, {"\"threads\": 1,$", MR_Next}, {"\"iterations\": %int,$", MR_Next}, {"\"real_time\": %float,$", MR_Next}, {"\"cpu_time\": %float,$", MR_Next}, {"\"time_unit\": \"ns\"$", MR_Next}, {"}", MR_Next}}); ADD_CASES(TC_CSVOut, {{"^\"BM_custom_name\",%csv_report$"}}); // ========================================================================= // // ------------------------ Testing Big Args Output ------------------------ // // ========================================================================= // void BM_BigArgs(benchmark::State& state) { for (auto _ : state) { } } BENCHMARK(BM_BigArgs)->RangeMultiplier(2)->Range(1U << 30U, 1U << 31U); ADD_CASES(TC_ConsoleOut, {{"^BM_BigArgs/1073741824 %console_report$"}, {"^BM_BigArgs/2147483648 %console_report$"}}); // ========================================================================= // // ----------------------- Testing Complexity Output ----------------------- // // ========================================================================= // void BM_Complexity_O1(benchmark::State& state) { for (auto _ : state) { // This test requires a non-zero CPU time to avoid divide-by-zero auto iterations = static_cast(state.iterations()) * static_cast(state.iterations()); benchmark::DoNotOptimize(iterations); } state.SetComplexityN(state.range(0)); } BENCHMARK(BM_Complexity_O1)->Range(1, 1 << 18)->Complexity(benchmark::o1); SET_SUBSTITUTIONS({{"%bigOStr", "[ ]* %float \\([0-9]+\\)"}, {"%RMS", "[ ]*[0-9]+ %"}}); ADD_CASES(TC_ConsoleOut, {{"^BM_Complexity_O1_BigO %bigOStr %bigOStr[ ]*$"}, {"^BM_Complexity_O1_RMS %RMS %RMS[ ]*$"}}); // ========================================================================= // // ----------------------- Testing Aggregate Output ------------------------ // // ========================================================================= // // Test that non-aggregate data is printed by default void BM_Repeat(benchmark::State& state) { for (auto _ : state) { } } // need two repetitions min to be able to output any aggregate output BENCHMARK(BM_Repeat)->Repetitions(2); ADD_CASES(TC_ConsoleOut, {{"^BM_Repeat/repeats:2 %console_report$"}, {"^BM_Repeat/repeats:2 %console_report$"}, {"^BM_Repeat/repeats:2_mean %console_time_only_report [ ]*2$"}, {"^BM_Repeat/repeats:2_median %console_time_only_report [ ]*2$"}, {"^BM_Repeat/repeats:2_stddev %console_time_only_report [ ]*2$"}}); ADD_CASES(TC_JSONOut, {{"\"name\": \"BM_Repeat/repeats:2\",$"}, {"\"family_index\": 15,$", MR_Next}, {"\"per_family_instance_index\": 0,$", MR_Next}, {"\"run_name\": \"BM_Repeat/repeats:2\"", MR_Next}, {"\"run_type\": \"iteration\",$", MR_Next}, {"\"repetitions\": 2,$", MR_Next}, {"\"repetition_index\": 0,$", MR_Next}, {"\"threads\": 1,$", MR_Next}, {"\"name\": \"BM_Repeat/repeats:2\",$"}, {"\"family_index\": 15,$", MR_Next}, {"\"per_family_instance_index\": 0,$", MR_Next}, {"\"run_name\": \"BM_Repeat/repeats:2\",$", MR_Next}, {"\"run_type\": \"iteration\",$", MR_Next}, {"\"repetitions\": 2,$", MR_Next}, {"\"repetition_index\": 1,$", MR_Next}, {"\"threads\": 1,$", MR_Next}, {"\"name\": \"BM_Repeat/repeats:2_mean\",$"}, {"\"family_index\": 15,$", MR_Next}, {"\"per_family_instance_index\": 0,$", MR_Next}, {"\"run_name\": \"BM_Repeat/repeats:2\",$", MR_Next}, {"\"run_type\": \"aggregate\",$", MR_Next}, {"\"repetitions\": 2,$", MR_Next}, {"\"threads\": 1,$", MR_Next}, {"\"aggregate_name\": \"mean\",$", MR_Next}, {"\"aggregate_unit\": \"time\",$", MR_Next}, {"\"iterations\": 2,$", MR_Next}, {"\"name\": \"BM_Repeat/repeats:2_median\",$"}, {"\"family_index\": 15,$", MR_Next}, {"\"per_family_instance_index\": 0,$", MR_Next}, {"\"run_name\": \"BM_Repeat/repeats:2\",$", MR_Next}, {"\"run_type\": \"aggregate\",$", MR_Next}, {"\"repetitions\": 2,$", MR_Next}, {"\"threads\": 1,$", MR_Next}, {"\"aggregate_name\": \"median\",$", MR_Next}, {"\"aggregate_unit\": \"time\",$", MR_Next}, {"\"iterations\": 2,$", MR_Next}, {"\"name\": \"BM_Repeat/repeats:2_stddev\",$"}, {"\"family_index\": 15,$", MR_Next}, {"\"per_family_instance_index\": 0,$", MR_Next}, {"\"run_name\": \"BM_Repeat/repeats:2\",$", MR_Next}, {"\"run_type\": \"aggregate\",$", MR_Next}, {"\"repetitions\": 2,$", MR_Next}, {"\"threads\": 1,$", MR_Next}, {"\"aggregate_name\": \"stddev\",$", MR_Next}, {"\"aggregate_unit\": \"time\",$", MR_Next}, {"\"iterations\": 2,$", MR_Next}}); ADD_CASES(TC_CSVOut, {{"^\"BM_Repeat/repeats:2\",%csv_report$"}, {"^\"BM_Repeat/repeats:2\",%csv_report$"}, {"^\"BM_Repeat/repeats:2_mean\",%csv_report$"}, {"^\"BM_Repeat/repeats:2_median\",%csv_report$"}, {"^\"BM_Repeat/repeats:2_stddev\",%csv_report$"}}); // but for two repetitions, mean and median is the same, so let's repeat.. BENCHMARK(BM_Repeat)->Repetitions(3); ADD_CASES(TC_ConsoleOut, {{"^BM_Repeat/repeats:3 %console_report$"}, {"^BM_Repeat/repeats:3 %console_report$"}, {"^BM_Repeat/repeats:3 %console_report$"}, {"^BM_Repeat/repeats:3_mean %console_time_only_report [ ]*3$"}, {"^BM_Repeat/repeats:3_median %console_time_only_report [ ]*3$"}, {"^BM_Repeat/repeats:3_stddev %console_time_only_report [ ]*3$"}}); ADD_CASES(TC_JSONOut, {{"\"name\": \"BM_Repeat/repeats:3\",$"}, {"\"family_index\": 16,$", MR_Next}, {"\"per_family_instance_index\": 0,$", MR_Next}, {"\"run_name\": \"BM_Repeat/repeats:3\",$", MR_Next}, {"\"run_type\": \"iteration\",$", MR_Next}, {"\"repetitions\": 3,$", MR_Next}, {"\"repetition_index\": 0,$", MR_Next}, {"\"threads\": 1,$", MR_Next}, {"\"name\": \"BM_Repeat/repeats:3\",$"}, {"\"family_index\": 16,$", MR_Next}, {"\"per_family_instance_index\": 0,$", MR_Next}, {"\"run_name\": \"BM_Repeat/repeats:3\",$", MR_Next}, {"\"run_type\": \"iteration\",$", MR_Next}, {"\"repetitions\": 3,$", MR_Next}, {"\"repetition_index\": 1,$", MR_Next}, {"\"threads\": 1,$", MR_Next}, {"\"name\": \"BM_Repeat/repeats:3\",$"}, {"\"family_index\": 16,$", MR_Next}, {"\"per_family_instance_index\": 0,$", MR_Next}, {"\"run_name\": \"BM_Repeat/repeats:3\",$", MR_Next}, {"\"run_type\": \"iteration\",$", MR_Next}, {"\"repetitions\": 3,$", MR_Next}, {"\"repetition_index\": 2,$", MR_Next}, {"\"threads\": 1,$", MR_Next}, {"\"name\": \"BM_Repeat/repeats:3_mean\",$"}, {"\"family_index\": 16,$", MR_Next}, {"\"per_family_instance_index\": 0,$", MR_Next}, {"\"run_name\": \"BM_Repeat/repeats:3\",$", MR_Next}, {"\"run_type\": \"aggregate\",$", MR_Next}, {"\"repetitions\": 3,$", MR_Next}, {"\"threads\": 1,$", MR_Next}, {"\"aggregate_name\": \"mean\",$", MR_Next}, {"\"aggregate_unit\": \"time\",$", MR_Next}, {"\"iterations\": 3,$", MR_Next}, {"\"name\": \"BM_Repeat/repeats:3_median\",$"}, {"\"family_index\": 16,$", MR_Next}, {"\"per_family_instance_index\": 0,$", MR_Next}, {"\"run_name\": \"BM_Repeat/repeats:3\",$", MR_Next}, {"\"run_type\": \"aggregate\",$", MR_Next}, {"\"repetitions\": 3,$", MR_Next}, {"\"threads\": 1,$", MR_Next}, {"\"aggregate_name\": \"median\",$", MR_Next}, {"\"aggregate_unit\": \"time\",$", MR_Next}, {"\"iterations\": 3,$", MR_Next}, {"\"name\": \"BM_Repeat/repeats:3_stddev\",$"}, {"\"family_index\": 16,$", MR_Next}, {"\"per_family_instance_index\": 0,$", MR_Next}, {"\"run_name\": \"BM_Repeat/repeats:3\",$", MR_Next}, {"\"run_type\": \"aggregate\",$", MR_Next}, {"\"repetitions\": 3,$", MR_Next}, {"\"threads\": 1,$", MR_Next}, {"\"aggregate_name\": \"stddev\",$", MR_Next}, {"\"aggregate_unit\": \"time\",$", MR_Next}, {"\"iterations\": 3,$", MR_Next}}); ADD_CASES(TC_CSVOut, {{"^\"BM_Repeat/repeats:3\",%csv_report$"}, {"^\"BM_Repeat/repeats:3\",%csv_report$"}, {"^\"BM_Repeat/repeats:3\",%csv_report$"}, {"^\"BM_Repeat/repeats:3_mean\",%csv_report$"}, {"^\"BM_Repeat/repeats:3_median\",%csv_report$"}, {"^\"BM_Repeat/repeats:3_stddev\",%csv_report$"}}); // median differs between even/odd number of repetitions, so just to be sure BENCHMARK(BM_Repeat)->Repetitions(4); ADD_CASES(TC_ConsoleOut, {{"^BM_Repeat/repeats:4 %console_report$"}, {"^BM_Repeat/repeats:4 %console_report$"}, {"^BM_Repeat/repeats:4 %console_report$"}, {"^BM_Repeat/repeats:4 %console_report$"}, {"^BM_Repeat/repeats:4_mean %console_time_only_report [ ]*4$"}, {"^BM_Repeat/repeats:4_median %console_time_only_report [ ]*4$"}, {"^BM_Repeat/repeats:4_stddev %console_time_only_report [ ]*4$"}}); ADD_CASES(TC_JSONOut, {{"\"name\": \"BM_Repeat/repeats:4\",$"}, {"\"family_index\": 17,$", MR_Next}, {"\"per_family_instance_index\": 0,$", MR_Next}, {"\"run_name\": \"BM_Repeat/repeats:4\",$", MR_Next}, {"\"run_type\": \"iteration\",$", MR_Next}, {"\"repetitions\": 4,$", MR_Next}, {"\"repetition_index\": 0,$", MR_Next}, {"\"threads\": 1,$", MR_Next}, {"\"name\": \"BM_Repeat/repeats:4\",$"}, {"\"family_index\": 17,$", MR_Next}, {"\"per_family_instance_index\": 0,$", MR_Next}, {"\"run_name\": \"BM_Repeat/repeats:4\",$", MR_Next}, {"\"run_type\": \"iteration\",$", MR_Next}, {"\"repetitions\": 4,$", MR_Next}, {"\"repetition_index\": 1,$", MR_Next}, {"\"threads\": 1,$", MR_Next}, {"\"name\": \"BM_Repeat/repeats:4\",$"}, {"\"family_index\": 17,$", MR_Next}, {"\"per_family_instance_index\": 0,$", MR_Next}, {"\"run_name\": \"BM_Repeat/repeats:4\",$", MR_Next}, {"\"run_type\": \"iteration\",$", MR_Next}, {"\"repetitions\": 4,$", MR_Next}, {"\"repetition_index\": 2,$", MR_Next}, {"\"threads\": 1,$", MR_Next}, {"\"name\": \"BM_Repeat/repeats:4\",$"}, {"\"family_index\": 17,$", MR_Next}, {"\"per_family_instance_index\": 0,$", MR_Next}, {"\"run_name\": \"BM_Repeat/repeats:4\",$", MR_Next}, {"\"run_type\": \"iteration\",$", MR_Next}, {"\"repetitions\": 4,$", MR_Next}, {"\"repetition_index\": 3,$", MR_Next}, {"\"threads\": 1,$", MR_Next}, {"\"name\": \"BM_Repeat/repeats:4_mean\",$"}, {"\"family_index\": 17,$", MR_Next}, {"\"per_family_instance_index\": 0,$", MR_Next}, {"\"run_name\": \"BM_Repeat/repeats:4\",$", MR_Next}, {"\"run_type\": \"aggregate\",$", MR_Next}, {"\"repetitions\": 4,$", MR_Next}, {"\"threads\": 1,$", MR_Next}, {"\"aggregate_name\": \"mean\",$", MR_Next}, {"\"aggregate_unit\": \"time\",$", MR_Next}, {"\"iterations\": 4,$", MR_Next}, {"\"name\": \"BM_Repeat/repeats:4_median\",$"}, {"\"family_index\": 17,$", MR_Next}, {"\"per_family_instance_index\": 0,$", MR_Next}, {"\"run_name\": \"BM_Repeat/repeats:4\",$", MR_Next}, {"\"run_type\": \"aggregate\",$", MR_Next}, {"\"repetitions\": 4,$", MR_Next}, {"\"threads\": 1,$", MR_Next}, {"\"aggregate_name\": \"median\",$", MR_Next}, {"\"aggregate_unit\": \"time\",$", MR_Next}, {"\"iterations\": 4,$", MR_Next}, {"\"name\": \"BM_Repeat/repeats:4_stddev\",$"}, {"\"family_index\": 17,$", MR_Next}, {"\"per_family_instance_index\": 0,$", MR_Next}, {"\"run_name\": \"BM_Repeat/repeats:4\",$", MR_Next}, {"\"run_type\": \"aggregate\",$", MR_Next}, {"\"repetitions\": 4,$", MR_Next}, {"\"threads\": 1,$", MR_Next}, {"\"aggregate_name\": \"stddev\",$", MR_Next}, {"\"aggregate_unit\": \"time\",$", MR_Next}, {"\"iterations\": 4,$", MR_Next}}); ADD_CASES(TC_CSVOut, {{"^\"BM_Repeat/repeats:4\",%csv_report$"}, {"^\"BM_Repeat/repeats:4\",%csv_report$"}, {"^\"BM_Repeat/repeats:4\",%csv_report$"}, {"^\"BM_Repeat/repeats:4\",%csv_report$"}, {"^\"BM_Repeat/repeats:4_mean\",%csv_report$"}, {"^\"BM_Repeat/repeats:4_median\",%csv_report$"}, {"^\"BM_Repeat/repeats:4_stddev\",%csv_report$"}}); // Test that a non-repeated test still prints non-aggregate results even when // only-aggregate reports have been requested void BM_RepeatOnce(benchmark::State& state) { for (auto _ : state) { } } BENCHMARK(BM_RepeatOnce)->Repetitions(1)->ReportAggregatesOnly(); ADD_CASES(TC_ConsoleOut, {{"^BM_RepeatOnce/repeats:1 %console_report$"}}); ADD_CASES(TC_JSONOut, {{"\"name\": \"BM_RepeatOnce/repeats:1\",$"}, {"\"family_index\": 18,$", MR_Next}, {"\"per_family_instance_index\": 0,$", MR_Next}, {"\"run_name\": \"BM_RepeatOnce/repeats:1\",$", MR_Next}, {"\"run_type\": \"iteration\",$", MR_Next}, {"\"repetitions\": 1,$", MR_Next}, {"\"repetition_index\": 0,$", MR_Next}, {"\"threads\": 1,$", MR_Next}}); ADD_CASES(TC_CSVOut, {{"^\"BM_RepeatOnce/repeats:1\",%csv_report$"}}); // Test that non-aggregate data is not reported void BM_SummaryRepeat(benchmark::State& state) { for (auto _ : state) { } } BENCHMARK(BM_SummaryRepeat)->Repetitions(3)->ReportAggregatesOnly(); ADD_CASES( TC_ConsoleOut, {{".*BM_SummaryRepeat/repeats:3 ", MR_Not}, {"^BM_SummaryRepeat/repeats:3_mean %console_time_only_report [ ]*3$"}, {"^BM_SummaryRepeat/repeats:3_median %console_time_only_report [ ]*3$"}, {"^BM_SummaryRepeat/repeats:3_stddev %console_time_only_report [ ]*3$"}}); ADD_CASES(TC_JSONOut, {{".*BM_SummaryRepeat/repeats:3 ", MR_Not}, {"\"name\": \"BM_SummaryRepeat/repeats:3_mean\",$"}, {"\"family_index\": 19,$", MR_Next}, {"\"per_family_instance_index\": 0,$", MR_Next}, {"\"run_name\": \"BM_SummaryRepeat/repeats:3\",$", MR_Next}, {"\"run_type\": \"aggregate\",$", MR_Next}, {"\"repetitions\": 3,$", MR_Next}, {"\"threads\": 1,$", MR_Next}, {"\"aggregate_name\": \"mean\",$", MR_Next}, {"\"aggregate_unit\": \"time\",$", MR_Next}, {"\"iterations\": 3,$", MR_Next}, {"\"name\": \"BM_SummaryRepeat/repeats:3_median\",$"}, {"\"family_index\": 19,$", MR_Next}, {"\"per_family_instance_index\": 0,$", MR_Next}, {"\"run_name\": \"BM_SummaryRepeat/repeats:3\",$", MR_Next}, {"\"run_type\": \"aggregate\",$", MR_Next}, {"\"repetitions\": 3,$", MR_Next}, {"\"threads\": 1,$", MR_Next}, {"\"aggregate_name\": \"median\",$", MR_Next}, {"\"aggregate_unit\": \"time\",$", MR_Next}, {"\"iterations\": 3,$", MR_Next}, {"\"name\": \"BM_SummaryRepeat/repeats:3_stddev\",$"}, {"\"family_index\": 19,$", MR_Next}, {"\"per_family_instance_index\": 0,$", MR_Next}, {"\"run_name\": \"BM_SummaryRepeat/repeats:3\",$", MR_Next}, {"\"run_type\": \"aggregate\",$", MR_Next}, {"\"repetitions\": 3,$", MR_Next}, {"\"threads\": 1,$", MR_Next}, {"\"aggregate_name\": \"stddev\",$", MR_Next}, {"\"aggregate_unit\": \"time\",$", MR_Next}, {"\"iterations\": 3,$", MR_Next}}); ADD_CASES(TC_CSVOut, {{".*BM_SummaryRepeat/repeats:3 ", MR_Not}, {"^\"BM_SummaryRepeat/repeats:3_mean\",%csv_report$"}, {"^\"BM_SummaryRepeat/repeats:3_median\",%csv_report$"}, {"^\"BM_SummaryRepeat/repeats:3_stddev\",%csv_report$"}}); // Test that non-aggregate data is not displayed. // NOTE: this test is kinda bad. we are only testing the display output. // But we don't check that the file output still contains everything... void BM_SummaryDisplay(benchmark::State& state) { for (auto _ : state) { } } BENCHMARK(BM_SummaryDisplay)->Repetitions(2)->DisplayAggregatesOnly(); ADD_CASES( TC_ConsoleOut, {{".*BM_SummaryDisplay/repeats:2 ", MR_Not}, {"^BM_SummaryDisplay/repeats:2_mean %console_time_only_report [ ]*2$"}, {"^BM_SummaryDisplay/repeats:2_median %console_time_only_report [ ]*2$"}, {"^BM_SummaryDisplay/repeats:2_stddev %console_time_only_report [ ]*2$"}}); ADD_CASES(TC_JSONOut, {{".*BM_SummaryDisplay/repeats:2 ", MR_Not}, {"\"name\": \"BM_SummaryDisplay/repeats:2_mean\",$"}, {"\"family_index\": 20,$", MR_Next}, {"\"per_family_instance_index\": 0,$", MR_Next}, {"\"run_name\": \"BM_SummaryDisplay/repeats:2\",$", MR_Next}, {"\"run_type\": \"aggregate\",$", MR_Next}, {"\"repetitions\": 2,$", MR_Next}, {"\"threads\": 1,$", MR_Next}, {"\"aggregate_name\": \"mean\",$", MR_Next}, {"\"aggregate_unit\": \"time\",$", MR_Next}, {"\"iterations\": 2,$", MR_Next}, {"\"name\": \"BM_SummaryDisplay/repeats:2_median\",$"}, {"\"family_index\": 20,$", MR_Next}, {"\"per_family_instance_index\": 0,$", MR_Next}, {"\"run_name\": \"BM_SummaryDisplay/repeats:2\",$", MR_Next}, {"\"run_type\": \"aggregate\",$", MR_Next}, {"\"repetitions\": 2,$", MR_Next}, {"\"threads\": 1,$", MR_Next}, {"\"aggregate_name\": \"median\",$", MR_Next}, {"\"aggregate_unit\": \"time\",$", MR_Next}, {"\"iterations\": 2,$", MR_Next}, {"\"name\": \"BM_SummaryDisplay/repeats:2_stddev\",$"}, {"\"family_index\": 20,$", MR_Next}, {"\"per_family_instance_index\": 0,$", MR_Next}, {"\"run_name\": \"BM_SummaryDisplay/repeats:2\",$", MR_Next}, {"\"run_type\": \"aggregate\",$", MR_Next}, {"\"repetitions\": 2,$", MR_Next}, {"\"threads\": 1,$", MR_Next}, {"\"aggregate_name\": \"stddev\",$", MR_Next}, {"\"aggregate_unit\": \"time\",$", MR_Next}, {"\"iterations\": 2,$", MR_Next}}); ADD_CASES(TC_CSVOut, {{".*BM_SummaryDisplay/repeats:2 ", MR_Not}, {"^\"BM_SummaryDisplay/repeats:2_mean\",%csv_report$"}, {"^\"BM_SummaryDisplay/repeats:2_median\",%csv_report$"}, {"^\"BM_SummaryDisplay/repeats:2_stddev\",%csv_report$"}}); // Test repeats with custom time unit. void BM_RepeatTimeUnit(benchmark::State& state) { for (auto _ : state) { } } BENCHMARK(BM_RepeatTimeUnit) ->Repetitions(3) ->ReportAggregatesOnly() ->Unit(benchmark::kMicrosecond); ADD_CASES( TC_ConsoleOut, {{".*BM_RepeatTimeUnit/repeats:3 ", MR_Not}, {"^BM_RepeatTimeUnit/repeats:3_mean %console_us_time_only_report [ ]*3$"}, {"^BM_RepeatTimeUnit/repeats:3_median %console_us_time_only_report [ " "]*3$"}, {"^BM_RepeatTimeUnit/repeats:3_stddev %console_us_time_only_report [ " "]*3$"}}); ADD_CASES(TC_JSONOut, {{".*BM_RepeatTimeUnit/repeats:3 ", MR_Not}, {"\"name\": \"BM_RepeatTimeUnit/repeats:3_mean\",$"}, {"\"family_index\": 21,$", MR_Next}, {"\"per_family_instance_index\": 0,$", MR_Next}, {"\"run_name\": \"BM_RepeatTimeUnit/repeats:3\",$", MR_Next}, {"\"run_type\": \"aggregate\",$", MR_Next}, {"\"repetitions\": 3,$", MR_Next}, {"\"threads\": 1,$", MR_Next}, {"\"aggregate_name\": \"mean\",$", MR_Next}, {"\"aggregate_unit\": \"time\",$", MR_Next}, {"\"iterations\": 3,$", MR_Next}, {"\"time_unit\": \"us\",?$"}, {"\"name\": \"BM_RepeatTimeUnit/repeats:3_median\",$"}, {"\"family_index\": 21,$", MR_Next}, {"\"per_family_instance_index\": 0,$", MR_Next}, {"\"run_name\": \"BM_RepeatTimeUnit/repeats:3\",$", MR_Next}, {"\"run_type\": \"aggregate\",$", MR_Next}, {"\"repetitions\": 3,$", MR_Next}, {"\"threads\": 1,$", MR_Next}, {"\"aggregate_name\": \"median\",$", MR_Next}, {"\"aggregate_unit\": \"time\",$", MR_Next}, {"\"iterations\": 3,$", MR_Next}, {"\"time_unit\": \"us\",?$"}, {"\"name\": \"BM_RepeatTimeUnit/repeats:3_stddev\",$"}, {"\"family_index\": 21,$", MR_Next}, {"\"per_family_instance_index\": 0,$", MR_Next}, {"\"run_name\": \"BM_RepeatTimeUnit/repeats:3\",$", MR_Next}, {"\"run_type\": \"aggregate\",$", MR_Next}, {"\"repetitions\": 3,$", MR_Next}, {"\"threads\": 1,$", MR_Next}, {"\"aggregate_name\": \"stddev\",$", MR_Next}, {"\"aggregate_unit\": \"time\",$", MR_Next}, {"\"iterations\": 3,$", MR_Next}, {"\"time_unit\": \"us\",?$"}}); ADD_CASES(TC_CSVOut, {{".*BM_RepeatTimeUnit/repeats:3 ", MR_Not}, {"^\"BM_RepeatTimeUnit/repeats:3_mean\",%csv_us_report$"}, {"^\"BM_RepeatTimeUnit/repeats:3_median\",%csv_us_report$"}, {"^\"BM_RepeatTimeUnit/repeats:3_stddev\",%csv_us_report$"}}); // ========================================================================= // // -------------------- Testing user-provided statistics ------------------- // // ========================================================================= // const auto UserStatistics = [](const std::vector& v) { return v.back(); }; void BM_UserStats(benchmark::State& state) { for (auto _ : state) { state.SetIterationTime(150 / 10e8); } } // clang-format off BENCHMARK(BM_UserStats) ->Repetitions(3) ->Iterations(5) ->UseManualTime() ->ComputeStatistics("", UserStatistics); // clang-format on // check that user-provided stats is calculated, and is after the default-ones // empty string as name is intentional, it would sort before anything else ADD_CASES(TC_ConsoleOut, {{"^BM_UserStats/iterations:5/repeats:3/manual_time [ " "]* 150 ns %time [ ]*5$"}, {"^BM_UserStats/iterations:5/repeats:3/manual_time [ " "]* 150 ns %time [ ]*5$"}, {"^BM_UserStats/iterations:5/repeats:3/manual_time [ " "]* 150 ns %time [ ]*5$"}, {"^BM_UserStats/iterations:5/repeats:3/" "manual_time_mean [ ]* 150 ns %time [ ]*3$"}, {"^BM_UserStats/iterations:5/repeats:3/" "manual_time_median [ ]* 150 ns %time [ ]*3$"}, {"^BM_UserStats/iterations:5/repeats:3/" "manual_time_stddev [ ]* 0.000 ns %time [ ]*3$"}, {"^BM_UserStats/iterations:5/repeats:3/manual_time_ " "[ ]* 150 ns %time [ ]*3$"}}); ADD_CASES( TC_JSONOut, {{"\"name\": \"BM_UserStats/iterations:5/repeats:3/manual_time\",$"}, {"\"family_index\": 22,$", MR_Next}, {"\"per_family_instance_index\": 0,$", MR_Next}, {"\"run_name\": \"BM_UserStats/iterations:5/repeats:3/manual_time\",$", MR_Next}, {"\"run_type\": \"iteration\",$", MR_Next}, {"\"repetitions\": 3,$", MR_Next}, {"\"repetition_index\": 0,$", MR_Next}, {"\"threads\": 1,$", MR_Next}, {"\"iterations\": 5,$", MR_Next}, {"\"real_time\": 1\\.5(0)*e\\+(0)*2,$", MR_Next}, {"\"name\": \"BM_UserStats/iterations:5/repeats:3/manual_time\",$"}, {"\"family_index\": 22,$", MR_Next}, {"\"per_family_instance_index\": 0,$", MR_Next}, {"\"run_name\": \"BM_UserStats/iterations:5/repeats:3/manual_time\",$", MR_Next}, {"\"run_type\": \"iteration\",$", MR_Next}, {"\"repetitions\": 3,$", MR_Next}, {"\"repetition_index\": 1,$", MR_Next}, {"\"threads\": 1,$", MR_Next}, {"\"iterations\": 5,$", MR_Next}, {"\"real_time\": 1\\.5(0)*e\\+(0)*2,$", MR_Next}, {"\"name\": \"BM_UserStats/iterations:5/repeats:3/manual_time\",$"}, {"\"family_index\": 22,$", MR_Next}, {"\"per_family_instance_index\": 0,$", MR_Next}, {"\"run_name\": \"BM_UserStats/iterations:5/repeats:3/manual_time\",$", MR_Next}, {"\"run_type\": \"iteration\",$", MR_Next}, {"\"repetitions\": 3,$", MR_Next}, {"\"repetition_index\": 2,$", MR_Next}, {"\"threads\": 1,$", MR_Next}, {"\"iterations\": 5,$", MR_Next}, {"\"real_time\": 1\\.5(0)*e\\+(0)*2,$", MR_Next}, {"\"name\": \"BM_UserStats/iterations:5/repeats:3/manual_time_mean\",$"}, {"\"family_index\": 22,$", MR_Next}, {"\"per_family_instance_index\": 0,$", MR_Next}, {"\"run_name\": \"BM_UserStats/iterations:5/repeats:3/manual_time\",$", MR_Next}, {"\"run_type\": \"aggregate\",$", MR_Next}, {"\"repetitions\": 3,$", MR_Next}, {"\"threads\": 1,$", MR_Next}, {"\"aggregate_name\": \"mean\",$", MR_Next}, {"\"aggregate_unit\": \"time\",$", MR_Next}, {"\"iterations\": 3,$", MR_Next}, {"\"real_time\": 1\\.5(0)*e\\+(0)*2,$", MR_Next}, {"\"name\": \"BM_UserStats/iterations:5/repeats:3/manual_time_median\",$"}, {"\"family_index\": 22,$", MR_Next}, {"\"per_family_instance_index\": 0,$", MR_Next}, {"\"run_name\": \"BM_UserStats/iterations:5/repeats:3/manual_time\",$", MR_Next}, {"\"run_type\": \"aggregate\",$", MR_Next}, {"\"repetitions\": 3,$", MR_Next}, {"\"threads\": 1,$", MR_Next}, {"\"aggregate_name\": \"median\",$", MR_Next}, {"\"aggregate_unit\": \"time\",$", MR_Next}, {"\"iterations\": 3,$", MR_Next}, {"\"real_time\": 1\\.5(0)*e\\+(0)*2,$", MR_Next}, {"\"name\": \"BM_UserStats/iterations:5/repeats:3/manual_time_stddev\",$"}, {"\"family_index\": 22,$", MR_Next}, {"\"per_family_instance_index\": 0,$", MR_Next}, {"\"run_name\": \"BM_UserStats/iterations:5/repeats:3/manual_time\",$", MR_Next}, {"\"run_type\": \"aggregate\",$", MR_Next}, {"\"repetitions\": 3,$", MR_Next}, {"\"threads\": 1,$", MR_Next}, {"\"aggregate_name\": \"stddev\",$", MR_Next}, {"\"aggregate_unit\": \"time\",$", MR_Next}, {"\"iterations\": 3,$", MR_Next}, {"\"real_time\": %float,$", MR_Next}, {"\"name\": \"BM_UserStats/iterations:5/repeats:3/manual_time_\",$"}, {"\"family_index\": 22,$", MR_Next}, {"\"per_family_instance_index\": 0,$", MR_Next}, {"\"run_name\": \"BM_UserStats/iterations:5/repeats:3/manual_time\",$", MR_Next}, {"\"run_type\": \"aggregate\",$", MR_Next}, {"\"repetitions\": 3,$", MR_Next}, {"\"threads\": 1,$", MR_Next}, {"\"aggregate_name\": \"\",$", MR_Next}, {"\"aggregate_unit\": \"time\",$", MR_Next}, {"\"iterations\": 3,$", MR_Next}, {"\"real_time\": 1\\.5(0)*e\\+(0)*2,$", MR_Next}}); ADD_CASES( TC_CSVOut, {{"^\"BM_UserStats/iterations:5/repeats:3/manual_time\",%csv_report$"}, {"^\"BM_UserStats/iterations:5/repeats:3/manual_time\",%csv_report$"}, {"^\"BM_UserStats/iterations:5/repeats:3/manual_time\",%csv_report$"}, {"^\"BM_UserStats/iterations:5/repeats:3/manual_time_mean\",%csv_report$"}, {"^\"BM_UserStats/iterations:5/repeats:3/" "manual_time_median\",%csv_report$"}, {"^\"BM_UserStats/iterations:5/repeats:3/" "manual_time_stddev\",%csv_report$"}, {"^\"BM_UserStats/iterations:5/repeats:3/manual_time_\",%csv_report$"}}); // ========================================================================= // // ------------- Testing relative standard deviation statistics ------------ // // ========================================================================= // const auto UserPercentStatistics = [](const std::vector&) { return 1. / 100.; }; void BM_UserPercentStats(benchmark::State& state) { for (auto _ : state) { state.SetIterationTime(150 / 10e8); } } // clang-format off BENCHMARK(BM_UserPercentStats) ->Repetitions(3) ->Iterations(5) ->UseManualTime() ->Unit(benchmark::TimeUnit::kNanosecond) ->ComputeStatistics("", UserPercentStatistics, benchmark::StatisticUnit::kPercentage); // clang-format on // check that UserPercent-provided stats is calculated, and is after the // default-ones empty string as name is intentional, it would sort before // anything else ADD_CASES(TC_ConsoleOut, {{"^BM_UserPercentStats/iterations:5/repeats:3/manual_time [ " "]* 150 ns %time [ ]*5$"}, {"^BM_UserPercentStats/iterations:5/repeats:3/manual_time [ " "]* 150 ns %time [ ]*5$"}, {"^BM_UserPercentStats/iterations:5/repeats:3/manual_time [ " "]* 150 ns %time [ ]*5$"}, {"^BM_UserPercentStats/iterations:5/repeats:3/" "manual_time_mean [ ]* 150 ns %time [ ]*3$"}, {"^BM_UserPercentStats/iterations:5/repeats:3/" "manual_time_median [ ]* 150 ns %time [ ]*3$"}, {"^BM_UserPercentStats/iterations:5/repeats:3/" "manual_time_stddev [ ]* 0.000 ns %time [ ]*3$"}, {"^BM_UserPercentStats/iterations:5/repeats:3/manual_time_ " "[ ]* 1.00 % [ ]* 1.00 %[ ]*3$"}}); ADD_CASES( TC_JSONOut, {{"\"name\": \"BM_UserPercentStats/iterations:5/repeats:3/manual_time\",$"}, {"\"family_index\": 23,$", MR_Next}, {"\"per_family_instance_index\": 0,$", MR_Next}, {"\"run_name\": " "\"BM_UserPercentStats/iterations:5/repeats:3/manual_time\",$", MR_Next}, {"\"run_type\": \"iteration\",$", MR_Next}, {"\"repetitions\": 3,$", MR_Next}, {"\"repetition_index\": 0,$", MR_Next}, {"\"threads\": 1,$", MR_Next}, {"\"iterations\": 5,$", MR_Next}, {"\"real_time\": 1\\.5(0)*e\\+(0)*2,$", MR_Next}, {"\"name\": \"BM_UserPercentStats/iterations:5/repeats:3/manual_time\",$"}, {"\"family_index\": 23,$", MR_Next}, {"\"per_family_instance_index\": 0,$", MR_Next}, {"\"run_name\": " "\"BM_UserPercentStats/iterations:5/repeats:3/manual_time\",$", MR_Next}, {"\"run_type\": \"iteration\",$", MR_Next}, {"\"repetitions\": 3,$", MR_Next}, {"\"repetition_index\": 1,$", MR_Next}, {"\"threads\": 1,$", MR_Next}, {"\"iterations\": 5,$", MR_Next}, {"\"real_time\": 1\\.5(0)*e\\+(0)*2,$", MR_Next}, {"\"name\": \"BM_UserPercentStats/iterations:5/repeats:3/manual_time\",$"}, {"\"family_index\": 23,$", MR_Next}, {"\"per_family_instance_index\": 0,$", MR_Next}, {"\"run_name\": " "\"BM_UserPercentStats/iterations:5/repeats:3/manual_time\",$", MR_Next}, {"\"run_type\": \"iteration\",$", MR_Next}, {"\"repetitions\": 3,$", MR_Next}, {"\"repetition_index\": 2,$", MR_Next}, {"\"threads\": 1,$", MR_Next}, {"\"iterations\": 5,$", MR_Next}, {"\"real_time\": 1\\.5(0)*e\\+(0)*2,$", MR_Next}, {"\"name\": " "\"BM_UserPercentStats/iterations:5/repeats:3/manual_time_mean\",$"}, {"\"family_index\": 23,$", MR_Next}, {"\"per_family_instance_index\": 0,$", MR_Next}, {"\"run_name\": " "\"BM_UserPercentStats/iterations:5/repeats:3/manual_time\",$", MR_Next}, {"\"run_type\": \"aggregate\",$", MR_Next}, {"\"repetitions\": 3,$", MR_Next}, {"\"threads\": 1,$", MR_Next}, {"\"aggregate_name\": \"mean\",$", MR_Next}, {"\"aggregate_unit\": \"time\",$", MR_Next}, {"\"iterations\": 3,$", MR_Next}, {"\"real_time\": 1\\.5(0)*e\\+(0)*2,$", MR_Next}, {"\"name\": " "\"BM_UserPercentStats/iterations:5/repeats:3/manual_time_median\",$"}, {"\"family_index\": 23,$", MR_Next}, {"\"per_family_instance_index\": 0,$", MR_Next}, {"\"run_name\": " "\"BM_UserPercentStats/iterations:5/repeats:3/manual_time\",$", MR_Next}, {"\"run_type\": \"aggregate\",$", MR_Next}, {"\"repetitions\": 3,$", MR_Next}, {"\"threads\": 1,$", MR_Next}, {"\"aggregate_name\": \"median\",$", MR_Next}, {"\"aggregate_unit\": \"time\",$", MR_Next}, {"\"iterations\": 3,$", MR_Next}, {"\"real_time\": 1\\.5(0)*e\\+(0)*2,$", MR_Next}, {"\"name\": " "\"BM_UserPercentStats/iterations:5/repeats:3/manual_time_stddev\",$"}, {"\"family_index\": 23,$", MR_Next}, {"\"per_family_instance_index\": 0,$", MR_Next}, {"\"run_name\": " "\"BM_UserPercentStats/iterations:5/repeats:3/manual_time\",$", MR_Next}, {"\"run_type\": \"aggregate\",$", MR_Next}, {"\"repetitions\": 3,$", MR_Next}, {"\"threads\": 1,$", MR_Next}, {"\"aggregate_name\": \"stddev\",$", MR_Next}, {"\"aggregate_unit\": \"time\",$", MR_Next}, {"\"iterations\": 3,$", MR_Next}, {"\"real_time\": %float,$", MR_Next}, {"\"name\": " "\"BM_UserPercentStats/iterations:5/repeats:3/manual_time_\",$"}, {"\"family_index\": 23,$", MR_Next}, {"\"per_family_instance_index\": 0,$", MR_Next}, {"\"run_name\": " "\"BM_UserPercentStats/iterations:5/repeats:3/manual_time\",$", MR_Next}, {"\"run_type\": \"aggregate\",$", MR_Next}, {"\"repetitions\": 3,$", MR_Next}, {"\"threads\": 1,$", MR_Next}, {"\"aggregate_name\": \"\",$", MR_Next}, {"\"aggregate_unit\": \"percentage\",$", MR_Next}, {"\"iterations\": 3,$", MR_Next}, {"\"real_time\": 1\\.(0)*e-(0)*2,$", MR_Next}}); ADD_CASES(TC_CSVOut, {{"^\"BM_UserPercentStats/iterations:5/repeats:3/" "manual_time\",%csv_report$"}, {"^\"BM_UserPercentStats/iterations:5/repeats:3/" "manual_time\",%csv_report$"}, {"^\"BM_UserPercentStats/iterations:5/repeats:3/" "manual_time\",%csv_report$"}, {"^\"BM_UserPercentStats/iterations:5/repeats:3/" "manual_time_mean\",%csv_report$"}, {"^\"BM_UserPercentStats/iterations:5/repeats:3/" "manual_time_median\",%csv_report$"}, {"^\"BM_UserPercentStats/iterations:5/repeats:3/" "manual_time_stddev\",%csv_report$"}, {"^\"BM_UserPercentStats/iterations:5/repeats:3/" "manual_time_\",%csv_cv_report$"}}); // ========================================================================= // // ------------------------- Testing StrEscape JSON ------------------------ // // ========================================================================= // #if 0 // enable when csv testing code correctly handles multi-line fields void BM_JSON_Format(benchmark::State& state) { state.SkipWithError("val\b\f\n\r\t\\\"with\"es,capes"); for (auto _ : state) { } } BENCHMARK(BM_JSON_Format); ADD_CASES(TC_JSONOut, {{"\"name\": \"BM_JSON_Format\",$"}, {"\"family_index\": 23,$", MR_Next}, {"\"per_family_instance_index\": 0,$", MR_Next}, {"\"run_name\": \"BM_JSON_Format\",$", MR_Next}, {"\"run_type\": \"iteration\",$", MR_Next}, {"\"repetitions\": 1,$", MR_Next}, {"\"repetition_index\": 0,$", MR_Next}, {"\"threads\": 1,$", MR_Next}, {"\"error_occurred\": true,$", MR_Next}, {R"("error_message": "val\\b\\f\\n\\r\\t\\\\\\"with\\"es,capes",$)", MR_Next}}); #endif // ========================================================================= // // -------------------------- Testing CsvEscape ---------------------------- // // ========================================================================= // void BM_CSV_Format(benchmark::State& state) { state.SkipWithError("\"freedom\""); for (auto _ : state) { } } BENCHMARK(BM_CSV_Format); ADD_CASES(TC_CSVOut, {{"^\"BM_CSV_Format\",,,,,,,,true,\"\"\"freedom\"\"\"$"}}); } // end namespace // ========================================================================= // // --------------------------- TEST CASES END ------------------------------ // // ========================================================================= // int main(int argc, char* argv[]) { benchmark::MaybeReenterWithoutASLR(argc, argv); RunOutputTests(argc, argv); } ================================================ FILE: test/skip_with_error_test.cc ================================================ #undef NDEBUG #include #include #include "../src/check.h" // NOTE: check.h is for internal use only! #include "benchmark/benchmark_api.h" #include "benchmark/registration.h" #include "benchmark/reporter.h" #include "benchmark/state.h" #include "benchmark/utils.h" namespace { class TestReporter : public benchmark::ConsoleReporter { public: bool ReportContext(const Context& context) override { return ConsoleReporter::ReportContext(context); }; void ReportRuns(const std::vector& report) override { all_runs_.insert(all_runs_.end(), begin(report), end(report)); ConsoleReporter::ReportRuns(report); } TestReporter() {} ~TestReporter() override {} mutable std::vector all_runs_; }; struct TestCase { std::string name; bool error_occurred; std::string error_message; typedef benchmark::BenchmarkReporter::Run Run; void CheckRun(Run const& run) const { BM_CHECK(name == run.benchmark_name()) << "expected " << name << " got " << run.benchmark_name(); BM_CHECK_EQ(error_occurred, benchmark::internal::SkippedWithError == run.skipped); BM_CHECK(error_message == run.skip_message); if (error_occurred) { // BM_CHECK(run.iterations == 0); } else { BM_CHECK(run.iterations != 0); } } }; // NOLINTNEXTLINE(cppcoreguidelines-avoid-non-const-global-variables) std::vector ExpectedResults; int AddCases(const std::string& base_name, std::initializer_list const& v) { for (auto TC : v) { TC.name = base_name + TC.name; ExpectedResults.push_back(std::move(TC)); } return 0; } #define CONCAT(x, y) CONCAT2(x, y) #define CONCAT2(x, y) x##y #define ADD_CASES(...) const int CONCAT(dummy, __LINE__) = AddCases(__VA_ARGS__) void BM_error_no_running(benchmark::State& state) { state.SkipWithError("error message"); } BENCHMARK(BM_error_no_running); ADD_CASES("BM_error_no_running", {{"", true, "error message"}}); void BM_error_before_running(benchmark::State& state) { state.SkipWithError("error message"); while (state.KeepRunning()) { assert(false); } } BENCHMARK(BM_error_before_running); ADD_CASES("BM_error_before_running", {{"", true, "error message"}}); void BM_error_before_running_batch(benchmark::State& state) { state.SkipWithError("error message"); while (state.KeepRunningBatch(17)) { assert(false); } } BENCHMARK(BM_error_before_running_batch); ADD_CASES("BM_error_before_running_batch", {{"", true, "error message"}}); void BM_error_before_running_range_for(benchmark::State& state) { state.SkipWithError("error message"); for (auto _ : state) { assert(false); } } BENCHMARK(BM_error_before_running_range_for); ADD_CASES("BM_error_before_running_range_for", {{"", true, "error message"}}); void BM_error_during_running(benchmark::State& state) { int first_iter = 1; while (state.KeepRunning()) { if (state.range(0) == 1 && state.thread_index() <= (state.threads() / 2)) { assert(first_iter); first_iter = 0; state.SkipWithError("error message"); } else { state.PauseTiming(); state.ResumeTiming(); } } } BENCHMARK(BM_error_during_running)->Arg(1)->Arg(2)->ThreadRange(1, 8); ADD_CASES("BM_error_during_running", {{"/1/threads:1", true, "error message"}, {"/1/threads:2", true, "error message"}, {"/1/threads:4", true, "error message"}, {"/1/threads:8", true, "error message"}, {"/2/threads:1", false, ""}, {"/2/threads:2", false, ""}, {"/2/threads:4", false, ""}, {"/2/threads:8", false, ""}}); void BM_error_during_running_ranged_for(benchmark::State& state) { assert(state.max_iterations > 3 && "test requires at least a few iterations"); bool first_iter = true; // NOTE: Users should not write the for loop explicitly. for (auto It = state.begin(), End = state.end(); It != End; ++It) { if (state.range(0) == 1) { assert(first_iter); first_iter = false; (void)first_iter; state.SkipWithError("error message"); // Test the unfortunate but documented behavior that the ranged-for loop // doesn't automatically terminate when SkipWithError is set. assert(++It != End); break; // Required behavior } } } BENCHMARK(BM_error_during_running_ranged_for)->Arg(1)->Arg(2)->Iterations(5); ADD_CASES("BM_error_during_running_ranged_for", {{"/1/iterations:5", true, "error message"}, {"/2/iterations:5", false, ""}}); void BM_error_after_running(benchmark::State& state) { for (auto _ : state) { auto iterations = static_cast(state.iterations()) * static_cast(state.iterations()); benchmark::DoNotOptimize(iterations); } if (state.thread_index() <= (state.threads() / 2)) { state.SkipWithError("error message"); } } BENCHMARK(BM_error_after_running)->ThreadRange(1, 8); ADD_CASES("BM_error_after_running", {{"/threads:1", true, "error message"}, {"/threads:2", true, "error message"}, {"/threads:4", true, "error message"}, {"/threads:8", true, "error message"}}); void BM_error_while_paused(benchmark::State& state) { bool first_iter = true; while (state.KeepRunning()) { if (state.range(0) == 1 && state.thread_index() <= (state.threads() / 2)) { assert(first_iter); first_iter = false; state.PauseTiming(); state.SkipWithError("error message"); } else { state.PauseTiming(); state.ResumeTiming(); } } } BENCHMARK(BM_error_while_paused)->Arg(1)->Arg(2)->ThreadRange(1, 8); ADD_CASES("BM_error_while_paused", {{"/1/threads:1", true, "error message"}, {"/1/threads:2", true, "error message"}, {"/1/threads:4", true, "error message"}, {"/1/threads:8", true, "error message"}, {"/2/threads:1", false, ""}, {"/2/threads:2", false, ""}, {"/2/threads:4", false, ""}, {"/2/threads:8", false, ""}}); void BM_malformed(benchmark::State& /*unused*/) { // NOTE: empty body wanted. No thing else. } BENCHMARK(BM_malformed); ADD_CASES("BM_malformed", {{"", true, "The benchmark didn't run, nor was it explicitly skipped. Please " "call 'SkipWithXXX` in your benchmark as appropriate."}}); } // end namespace int main(int argc, char* argv[]) { benchmark::MaybeReenterWithoutASLR(argc, argv); benchmark::Initialize(&argc, argv); TestReporter test_reporter; benchmark::RunSpecifiedBenchmarks(&test_reporter); typedef benchmark::BenchmarkReporter::Run Run; auto EB = ExpectedResults.begin(); for (Run const& run : test_reporter.all_runs_) { assert(EB != ExpectedResults.end()); EB->CheckRun(run); ++EB; } assert(EB == ExpectedResults.end()); return 0; } ================================================ FILE: test/spec_arg_test.cc ================================================ #include #include #include #include #include #include #include #include #include #include "benchmark/benchmark_api.h" #include "benchmark/registration.h" #include "benchmark/reporter.h" #include "benchmark/state.h" // Tests that we can override benchmark-spec value from FLAGS_benchmark_filter // with argument to RunSpecifiedBenchmarks(...). namespace { class TestReporter : public benchmark::ConsoleReporter { public: bool ReportContext(const Context& context) override { return ConsoleReporter::ReportContext(context); }; void ReportRuns(const std::vector& report) override { assert(report.size() == 1); matched_functions.push_back(report[0].run_name.function_name); ConsoleReporter::ReportRuns(report); }; TestReporter() {} ~TestReporter() override {} const std::vector& GetMatchedFunctions() const { return matched_functions; } private: std::vector matched_functions; }; void BM_NotChosen(benchmark::State& state) { assert(false && "SHOULD NOT BE CALLED"); for (auto _ : state) { } } BENCHMARK(BM_NotChosen); void BM_Chosen(benchmark::State& state) { for (auto _ : state) { } } BENCHMARK(BM_Chosen); } // end namespace int main(int argc, char** argv) { benchmark::MaybeReenterWithoutASLR(argc, argv); const std::string flag = "BM_NotChosen"; // Verify that argv specify --benchmark_filter=BM_NotChosen. bool found = false; for (int i = 0; i < argc; ++i) { if (strcmp("--benchmark_filter=BM_NotChosen", argv[i]) == 0) { found = true; break; } } assert(found); benchmark::Initialize(&argc, argv); // Check that the current flag value is reported accurately via the // GetBenchmarkFilter() function. if (flag != benchmark::GetBenchmarkFilter()) { std::cerr << "Seeing different value for flags. GetBenchmarkFilter() returns [" << benchmark::GetBenchmarkFilter() << "] expected flag=[" << flag << "]\n"; return 1; } TestReporter test_reporter; const char* const spec = "BM_Chosen"; const size_t returned_count = benchmark::RunSpecifiedBenchmarks(&test_reporter, spec); assert(returned_count == 1); const std::vector matched_functions = test_reporter.GetMatchedFunctions(); assert(matched_functions.size() == 1); if (strcmp(spec, matched_functions.front().c_str()) != 0) { std::cerr << "Expected benchmark [" << spec << "] to run, but got [" << matched_functions.front() << "]\n"; return 2; } // Test that SetBenchmarkFilter works. const std::string golden_value = "golden_value"; benchmark::SetBenchmarkFilter(golden_value); std::string current_value = benchmark::GetBenchmarkFilter(); if (golden_value != current_value) { std::cerr << "Expected [" << golden_value << "] for --benchmark_filter but got [" << current_value << "]\n"; return 3; } return 0; } ================================================ FILE: test/spec_arg_verbosity_test.cc ================================================ #include #include #include "benchmark/benchmark_api.h" #include "benchmark/registration.h" #include "benchmark/state.h" namespace { // Tests that the user specified verbosity level can be get. void BM_Verbosity(benchmark::State& state) { for (auto _ : state) { } } BENCHMARK(BM_Verbosity); } // end namespace int main(int argc, char** argv) { benchmark::MaybeReenterWithoutASLR(argc, argv); const int32_t flagv = 42; // Verify that argv specify --v=42. bool found = false; for (int i = 0; i < argc; ++i) { if (strcmp("--v=42", argv[i]) == 0) { found = true; break; } } if (!found) { std::cerr << "This test requires '--v=42' to be passed as a command-line " << "argument.\n"; return 1; } benchmark::Initialize(&argc, argv); // Check that the current flag value is reported accurately via the // GetBenchmarkVerbosity() function. if (flagv != benchmark::GetBenchmarkVerbosity()) { std::cerr << "Seeing different value for flags. GetBenchmarkVerbosity() returns [" << benchmark::GetBenchmarkVerbosity() << "] expected flag=[" << flagv << "]\n"; return 1; } return 0; } ================================================ FILE: test/state_assembly_test.cc ================================================ #include "benchmark/state.h" #include "benchmark/utils.h" #ifdef __clang__ #pragma clang diagnostic ignored "-Wreturn-type" #pragma clang diagnostic ignored "-Wmissing-prototypes" #endif // clang-format off extern "C" { extern int ExternInt; benchmark::State& GetState(); void Fn(); } // clang-format on using benchmark::State; // CHECK-LABEL: test_for_auto_loop: extern "C" int test_for_auto_loop() { State& S = GetState(); int x = 42; // CHECK: [[CALL:call(q)*]] _ZN9benchmark5State16StartKeepRunningEv // CHECK-NEXT: testq %rbx, %rbx // CHECK-NEXT: je [[LOOP_END:.*]] for (auto _ : S) { // CHECK: .L[[LOOP_HEAD:[a-zA-Z0-9_]+]]: // CHECK-GNU-NEXT: subq $1, %rbx // CHECK-CLANG-NEXT: {{(addq \$1, %rax|incq %rax|addq \$-1, %rbx)}} // CHECK-NEXT: jne .L[[LOOP_HEAD]] benchmark::DoNotOptimize(x); } // CHECK: [[LOOP_END]]: // CHECK: [[CALL]] _ZN9benchmark5State17FinishKeepRunningEv // CHECK: movl $101, %eax // CHECK: ret return 101; } // CHECK-LABEL: test_while_loop: extern "C" int test_while_loop() { State& S = GetState(); int x = 42; // CHECK: j{{(e|mp)}} .L[[LOOP_HEADER:[a-zA-Z0-9_]+]] // CHECK-NEXT: .L[[LOOP_BODY:[a-zA-Z0-9_]+]]: while (S.KeepRunning()) { // CHECK-GNU-NEXT: subq $1, %[[IREG:[a-z]+]] // CHECK-CLANG-NEXT: {{(addq \$-1,|decq)}} %[[IREG:[a-z]+]] // CHECK: movq %[[IREG]], [[DEST:.*]] benchmark::DoNotOptimize(x); } // CHECK-DAG: movq [[DEST]], %[[IREG]] // CHECK-DAG: testq %[[IREG]], %[[IREG]] // CHECK-DAG: jne .L[[LOOP_BODY]] // CHECK-DAG: .L[[LOOP_HEADER]]: // CHECK: cmpb $0 // CHECK-NEXT: jne .L[[LOOP_END:[a-zA-Z0-9_]+]] // CHECK: [[CALL:call(q)*]] _ZN9benchmark5State16StartKeepRunningEv // CHECK: .L[[LOOP_END]]: // CHECK: [[CALL]] _ZN9benchmark5State17FinishKeepRunningEv // CHECK: movl $101, %eax // CHECK: ret return 101; } ================================================ FILE: test/statistics_gtest.cc ================================================ //===---------------------------------------------------------------------===// // statistics_test - Unit tests for src/statistics.cc //===---------------------------------------------------------------------===// #include "../src/statistics.h" #include "gtest/gtest.h" namespace { TEST(StatisticsTest, Mean) { EXPECT_DOUBLE_EQ(benchmark::StatisticsMean({42, 42, 42, 42}), 42.0); EXPECT_DOUBLE_EQ(benchmark::StatisticsMean({1, 2, 3, 4}), 2.5); EXPECT_DOUBLE_EQ(benchmark::StatisticsMean({1, 2, 5, 10, 10, 14}), 7.0); } TEST(StatisticsTest, Median) { EXPECT_DOUBLE_EQ(benchmark::StatisticsMedian({42, 42, 42, 42}), 42.0); EXPECT_DOUBLE_EQ(benchmark::StatisticsMedian({1, 2, 3, 4}), 2.5); EXPECT_DOUBLE_EQ(benchmark::StatisticsMedian({1, 2, 5, 10, 10}), 5.0); } TEST(StatisticsTest, StdDev) { EXPECT_DOUBLE_EQ(benchmark::StatisticsStdDev({101, 101, 101, 101}), 0.0); EXPECT_DOUBLE_EQ(benchmark::StatisticsStdDev({1, 2, 3}), 1.0); EXPECT_DOUBLE_EQ(benchmark::StatisticsStdDev({2.5, 2.4, 3.3, 4.2, 5.1}), 1.151086443322134); } TEST(StatisticsTest, CV) { EXPECT_DOUBLE_EQ(benchmark::StatisticsCV({101, 101, 101, 101}), 0.0); EXPECT_DOUBLE_EQ(benchmark::StatisticsCV({1, 2, 3}), 1. / 2.); ASSERT_NEAR(benchmark::StatisticsCV({2.5, 2.4, 3.3, 4.2, 5.1}), 0.32888184094918121, 1e-15); } } // end namespace ================================================ FILE: test/string_util_gtest.cc ================================================ //===---------------------------------------------------------------------===// // string_util_test - Unit tests for src/string_util.cc //===---------------------------------------------------------------------===// #include #include "../src/internal_macros.h" #include "../src/string_util.h" #include "gmock/gmock.h" #include "gtest/gtest.h" namespace { TEST(StringUtilTest, stoul) { { size_t pos = 0; EXPECT_EQ(0UL, benchmark::stoul("0", &pos)); EXPECT_EQ(1UL, pos); } { size_t pos = 0; EXPECT_EQ(7UL, benchmark::stoul("7", &pos)); EXPECT_EQ(1UL, pos); } { size_t pos = 0; EXPECT_EQ(135UL, benchmark::stoul("135", &pos)); EXPECT_EQ(3UL, pos); } #if ULONG_MAX == 0xFFFFFFFFul { size_t pos = 0; EXPECT_EQ(0xFFFFFFFFul, benchmark::stoul("4294967295", &pos)); EXPECT_EQ(10ul, pos); } #elif ULONG_MAX == 0xFFFFFFFFFFFFFFFFul { size_t pos = 0; EXPECT_EQ(0xFFFFFFFFFFFFFFFFUL, benchmark::stoul("18446744073709551615", &pos)); EXPECT_EQ(20UL, pos); } #endif { size_t pos = 0; EXPECT_EQ(10UL, benchmark::stoul("1010", &pos, 2)); EXPECT_EQ(4UL, pos); } { size_t pos = 0; EXPECT_EQ(520UL, benchmark::stoul("1010", &pos, 8)); EXPECT_EQ(4UL, pos); } { size_t pos = 0; EXPECT_EQ(1010UL, benchmark::stoul("1010", &pos, 10)); EXPECT_EQ(4UL, pos); } { size_t pos = 0; EXPECT_EQ(4112UL, benchmark::stoul("1010", &pos, 16)); EXPECT_EQ(4UL, pos); } { size_t pos = 0; EXPECT_EQ(0xBEEFUL, benchmark::stoul("BEEF", &pos, 16)); EXPECT_EQ(4UL, pos); } #ifndef BENCHMARK_HAS_NO_EXCEPTIONS { ASSERT_THROW(std::ignore = benchmark::stoul("this is a test"), std::invalid_argument); } #endif } TEST(StringUtilTest, stoi) { { size_t pos = 0; EXPECT_EQ(0, benchmark::stoi("0", &pos)); EXPECT_EQ(1UL, pos); } // namespace { size_t pos = 0; EXPECT_EQ(-17, benchmark::stoi("-17", &pos)); EXPECT_EQ(3UL, pos); } { size_t pos = 0; EXPECT_EQ(1357, benchmark::stoi("1357", &pos)); EXPECT_EQ(4UL, pos); } { size_t pos = 0; EXPECT_EQ(10, benchmark::stoi("1010", &pos, 2)); EXPECT_EQ(4UL, pos); } { size_t pos = 0; EXPECT_EQ(520, benchmark::stoi("1010", &pos, 8)); EXPECT_EQ(4UL, pos); } { size_t pos = 0; EXPECT_EQ(1010, benchmark::stoi("1010", &pos, 10)); EXPECT_EQ(4UL, pos); } { size_t pos = 0; EXPECT_EQ(4112, benchmark::stoi("1010", &pos, 16)); EXPECT_EQ(4UL, pos); } { size_t pos = 0; EXPECT_EQ(0xBEEF, benchmark::stoi("BEEF", &pos, 16)); EXPECT_EQ(4UL, pos); } #ifndef BENCHMARK_HAS_NO_EXCEPTIONS { ASSERT_THROW(std::ignore = benchmark::stoi("this is a test"), std::invalid_argument); } #endif } TEST(StringUtilTest, stod) { { size_t pos = 0; EXPECT_EQ(0.0, benchmark::stod("0", &pos)); EXPECT_EQ(1UL, pos); } { size_t pos = 0; EXPECT_EQ(-84.0, benchmark::stod("-84", &pos)); EXPECT_EQ(3UL, pos); } { size_t pos = 0; EXPECT_EQ(1234.0, benchmark::stod("1234", &pos)); EXPECT_EQ(4UL, pos); } { size_t pos = 0; EXPECT_EQ(1.5, benchmark::stod("1.5", &pos)); EXPECT_EQ(3UL, pos); } { size_t pos = 0; /* Note: exactly representable as double */ EXPECT_EQ(-1.25e+9, benchmark::stod("-1.25e+9", &pos)); EXPECT_EQ(8UL, pos); } #ifndef BENCHMARK_HAS_NO_EXCEPTIONS { ASSERT_THROW(std::ignore = benchmark::stod("this is a test"), std::invalid_argument); } #endif } TEST(StringUtilTest, StrSplit) { EXPECT_EQ(benchmark::StrSplit("", ','), std::vector{}); EXPECT_EQ(benchmark::StrSplit("hello", ','), std::vector({"hello"})); EXPECT_EQ(benchmark::StrSplit("hello,there,is,more", ','), std::vector({"hello", "there", "is", "more"})); } using HumanReadableFixture = ::testing::TestWithParam< std::tuple>; INSTANTIATE_TEST_SUITE_P( HumanReadableTests, HumanReadableFixture, ::testing::Values( std::make_tuple(0.0, benchmark::Counter::kIs1024, "0"), std::make_tuple(999.0, benchmark::Counter::kIs1024, "999"), std::make_tuple(1000.0, benchmark::Counter::kIs1024, "1000"), std::make_tuple(1024.0, benchmark::Counter::kIs1024, "1Ki"), std::make_tuple(1000 * 1000.0, benchmark::Counter::kIs1024, "976\\.56.Ki"), std::make_tuple(1024 * 1024.0, benchmark::Counter::kIs1024, "1Mi"), std::make_tuple(1000 * 1000 * 1000.0, benchmark::Counter::kIs1024, "953\\.674Mi"), std::make_tuple(1024 * 1024 * 1024.0, benchmark::Counter::kIs1024, "1Gi"), std::make_tuple(0.0, benchmark::Counter::kIs1000, "0"), std::make_tuple(999.0, benchmark::Counter::kIs1000, "999"), std::make_tuple(1000.0, benchmark::Counter::kIs1000, "1k"), std::make_tuple(1024.0, benchmark::Counter::kIs1000, "1.024k"), std::make_tuple(1000 * 1000.0, benchmark::Counter::kIs1000, "1M"), std::make_tuple(1024 * 1024.0, benchmark::Counter::kIs1000, "1\\.04858M"), std::make_tuple(1000 * 1000 * 1000.0, benchmark::Counter::kIs1000, "1G"), std::make_tuple(1024 * 1024 * 1024.0, benchmark::Counter::kIs1000, "1\\.07374G"))); TEST_P(HumanReadableFixture, HumanReadableNumber) { std::string str = benchmark::HumanReadableNumber(std::get<0>(GetParam()), std::get<1>(GetParam())); ASSERT_THAT(str, ::testing::MatchesRegex(std::get<2>(GetParam()))); } } // end namespace ================================================ FILE: test/templated_fixture_method_test.cc ================================================ #include #include #include "benchmark/benchmark_api.h" #include "benchmark/registration.h" #include "benchmark/state.h" template class MyFixture : public ::benchmark::Fixture { public: MyFixture() : data(0) {} T data; using type = T; }; BENCHMARK_TEMPLATE_METHOD_F(MyFixture, Foo)(benchmark::State& st) { for (auto _ : st) { this->data += typename Base::type(1); } } BENCHMARK_TEMPLATE_INSTANTIATE_F(MyFixture, Foo, int); BENCHMARK_TEMPLATE_INSTANTIATE_F(MyFixture, Foo, double); BENCHMARK_MAIN(); ================================================ FILE: test/templated_fixture_test.cc ================================================ #include #include #include "benchmark/benchmark_api.h" #include "benchmark/registration.h" #include "benchmark/state.h" template class MyFixture : public ::benchmark::Fixture { public: MyFixture() : data(0) {} T data; }; BENCHMARK_TEMPLATE_F(MyFixture, Foo, int)(benchmark::State& st) { for (auto _ : st) { data += 1; } } BENCHMARK_TEMPLATE_DEFINE_F(MyFixture, Bar, double)(benchmark::State& st) { for (auto _ : st) { data += 1.0; } } BENCHMARK_REGISTER_F(MyFixture, Bar); BENCHMARK_MAIN(); ================================================ FILE: test/time_unit_gtest.cc ================================================ #include "benchmark/benchmark_api.h" #include "benchmark/state.h" #include "gtest/gtest.h" namespace benchmark { namespace internal { namespace { class DummyBenchmark : public benchmark::Benchmark { public: DummyBenchmark() : Benchmark("dummy") {} void Run(State& /*state*/) override {} }; TEST(DefaultTimeUnitTest, TimeUnitIsNotSet) { DummyBenchmark benchmark; EXPECT_EQ(benchmark.GetTimeUnit(), kNanosecond); } TEST(DefaultTimeUnitTest, DefaultIsSet) { DummyBenchmark benchmark; EXPECT_EQ(benchmark.GetTimeUnit(), kNanosecond); SetDefaultTimeUnit(kMillisecond); EXPECT_EQ(benchmark.GetTimeUnit(), kMillisecond); } TEST(DefaultTimeUnitTest, DefaultAndExplicitUnitIsSet) { DummyBenchmark benchmark; benchmark.Unit(kMillisecond); SetDefaultTimeUnit(kMicrosecond); EXPECT_EQ(benchmark.GetTimeUnit(), kMillisecond); } } // namespace } // namespace internal } // namespace benchmark ================================================ FILE: test/user_counters_tabular_test.cc ================================================ #undef NDEBUG #include "benchmark/benchmark_api.h" #include "benchmark/counter.h" #include "benchmark/registration.h" #include "benchmark/state.h" #include "benchmark/utils.h" #include "output_test.h" namespace { // @todo: this checks the full output at once; the rule for // CounterSet1 was failing because it was not matching "^[-]+$". // @todo: check that the counters are vertically aligned. ADD_CASES(TC_ConsoleOut, { // keeping these lines long improves readability, so: // clang-format off {"^[-]+$", MR_Next}, {"^Benchmark %s Time %s CPU %s Iterations %s Bar %s Bat %s Baz %s Foo %s Frob %s Lob$", MR_Next}, {"^[-]+$", MR_Next}, {"^BM_Counters_Tabular/repeats:2/threads:1 %console_report [ ]*%hrfloat [ ]*%hrfloat [ ]*%hrfloat [ ]*%hrfloat [ ]*%hrfloat [ ]*%hrfloat$", MR_Next}, {"^BM_Counters_Tabular/repeats:2/threads:1 %console_report [ ]*%hrfloat [ ]*%hrfloat [ ]*%hrfloat [ ]*%hrfloat [ ]*%hrfloat [ ]*%hrfloat$", MR_Next}, {"^BM_Counters_Tabular/repeats:2/threads:1_mean %console_report [ ]*%hrfloat [ ]*%hrfloat [ ]*%hrfloat [ ]*%hrfloat [ ]*%hrfloat [ ]*%hrfloat$", MR_Next}, {"^BM_Counters_Tabular/repeats:2/threads:1_median %console_report [ ]*%hrfloat [ ]*%hrfloat [ ]*%hrfloat [ ]*%hrfloat [ ]*%hrfloat [ ]*%hrfloat$", MR_Next}, {"^BM_Counters_Tabular/repeats:2/threads:1_stddev %console_report [ ]*%hrfloat [ ]*%hrfloat [ ]*%hrfloat [ ]*%hrfloat [ ]*%hrfloat [ ]*%hrfloat$", MR_Next}, {"^BM_Counters_Tabular/repeats:2/threads:1_cv %console_percentage_report [ ]*%percentage[ ]*% [ ]*%percentage[ ]*% [ ]*%percentage[ ]*% [ ]*%percentage[ ]*% [ ]*%percentage[ ]*% [ ]*%percentage[ ]*%$", MR_Next}, {"^BM_Counters_Tabular/repeats:2/threads:2 %console_report [ ]*%hrfloat [ ]*%hrfloat [ ]*%hrfloat [ ]*%hrfloat [ ]*%hrfloat [ ]*%hrfloat$", MR_Next}, {"^BM_Counters_Tabular/repeats:2/threads:2 %console_report [ ]*%hrfloat [ ]*%hrfloat [ ]*%hrfloat [ ]*%hrfloat [ ]*%hrfloat [ ]*%hrfloat$", MR_Next}, {"^BM_Counters_Tabular/repeats:2/threads:2_mean %console_report [ ]*%hrfloat [ ]*%hrfloat [ ]*%hrfloat [ ]*%hrfloat [ ]*%hrfloat [ ]*%hrfloat$", MR_Next}, {"^BM_Counters_Tabular/repeats:2/threads:2_median %console_report [ ]*%hrfloat [ ]*%hrfloat [ ]*%hrfloat [ ]*%hrfloat [ ]*%hrfloat [ ]*%hrfloat$", MR_Next}, {"^BM_Counters_Tabular/repeats:2/threads:2_stddev %console_report [ ]*%hrfloat [ ]*%hrfloat [ ]*%hrfloat [ ]*%hrfloat [ ]*%hrfloat [ ]*%hrfloat$", MR_Next}, {"^BM_Counters_Tabular/repeats:2/threads:2_cv %console_percentage_report [ ]*%percentage[ ]*% [ ]*%percentage[ ]*% [ ]*%percentage[ ]*% [ ]*%percentage[ ]*% [ ]*%percentage[ ]*% [ ]*%percentage[ ]*%$", MR_Next}, {"^BM_CounterRates_Tabular/threads:%int %console_report [ ]*%hrfloat/s [ ]*%hrfloat/s [ ]*%hrfloat/s [ ]*%hrfloat/s [ ]*%hrfloat/s [ ]*%hrfloat/s$", MR_Next}, {"^BM_CounterRates_Tabular/threads:%int %console_report [ ]*%hrfloat/s [ ]*%hrfloat/s [ ]*%hrfloat/s [ ]*%hrfloat/s [ ]*%hrfloat/s [ ]*%hrfloat/s$", MR_Next}, {"^BM_CounterRates_Tabular/threads:%int %console_report [ ]*%hrfloat/s [ ]*%hrfloat/s [ ]*%hrfloat/s [ ]*%hrfloat/s [ ]*%hrfloat/s [ ]*%hrfloat/s$", MR_Next}, {"^BM_CounterRates_Tabular/threads:%int %console_report [ ]*%hrfloat/s [ ]*%hrfloat/s [ ]*%hrfloat/s [ ]*%hrfloat/s [ ]*%hrfloat/s [ ]*%hrfloat/s$", MR_Next}, {"^BM_CounterRates_Tabular/threads:%int %console_report [ ]*%hrfloat/s [ ]*%hrfloat/s [ ]*%hrfloat/s [ ]*%hrfloat/s [ ]*%hrfloat/s [ ]*%hrfloat/s$", MR_Next}, {"^[-]+$", MR_Next}, {"^Benchmark %s Time %s CPU %s Iterations %s Bar %s Baz %s Foo$", MR_Next}, {"^[-]+$", MR_Next}, {"^BM_CounterSet0_Tabular/threads:%int %console_report [ ]*%hrfloat [ ]*%hrfloat [ ]*%hrfloat$", MR_Next}, {"^BM_CounterSet0_Tabular/threads:%int %console_report [ ]*%hrfloat [ ]*%hrfloat [ ]*%hrfloat$", MR_Next}, {"^BM_CounterSet0_Tabular/threads:%int %console_report [ ]*%hrfloat [ ]*%hrfloat [ ]*%hrfloat$", MR_Next}, {"^BM_CounterSet0_Tabular/threads:%int %console_report [ ]*%hrfloat [ ]*%hrfloat [ ]*%hrfloat$", MR_Next}, {"^BM_CounterSet0_Tabular/threads:%int %console_report [ ]*%hrfloat [ ]*%hrfloat [ ]*%hrfloat$", MR_Next}, {"^BM_CounterSet1_Tabular/threads:%int %console_report [ ]*%hrfloat [ ]*%hrfloat [ ]*%hrfloat$", MR_Next}, {"^BM_CounterSet1_Tabular/threads:%int %console_report [ ]*%hrfloat [ ]*%hrfloat [ ]*%hrfloat$", MR_Next}, {"^BM_CounterSet1_Tabular/threads:%int %console_report [ ]*%hrfloat [ ]*%hrfloat [ ]*%hrfloat$", MR_Next}, {"^BM_CounterSet1_Tabular/threads:%int %console_report [ ]*%hrfloat [ ]*%hrfloat [ ]*%hrfloat$", MR_Next}, {"^BM_CounterSet1_Tabular/threads:%int %console_report [ ]*%hrfloat [ ]*%hrfloat [ ]*%hrfloat$", MR_Next}, {"^[-]+$", MR_Next}, {"^Benchmark %s Time %s CPU %s Iterations %s Bat %s Baz %s Foo$", MR_Next}, {"^[-]+$", MR_Next}, {"^BM_CounterSet2_Tabular/threads:%int %console_report [ ]*%hrfloat [ ]*%hrfloat [ ]*%hrfloat$", MR_Next}, {"^BM_CounterSet2_Tabular/threads:%int %console_report [ ]*%hrfloat [ ]*%hrfloat [ ]*%hrfloat$", MR_Next}, {"^BM_CounterSet2_Tabular/threads:%int %console_report [ ]*%hrfloat [ ]*%hrfloat [ ]*%hrfloat$", MR_Next}, {"^BM_CounterSet2_Tabular/threads:%int %console_report [ ]*%hrfloat [ ]*%hrfloat [ ]*%hrfloat$", MR_Next}, {"^BM_CounterSet2_Tabular/threads:%int %console_report [ ]*%hrfloat [ ]*%hrfloat [ ]*%hrfloat$"}, // clang-format on }); ADD_CASES(TC_CSVOut, {{"%csv_header," "\"Bar\",\"Bat\",\"Baz\",\"Foo\",\"Frob\",\"Lob\""}}); // ========================================================================= // // ------------------------- Tabular Counters Output ----------------------- // // ========================================================================= // void BM_Counters_Tabular(benchmark::State& state) { for (auto _ : state) { // This test requires a non-zero CPU time to avoid divide-by-zero auto iterations = static_cast(state.iterations()) * static_cast(state.iterations()); benchmark::DoNotOptimize(iterations); } namespace bm = benchmark; state.counters.insert({ {"Foo", {1, bm::Counter::kAvgThreads}}, {"Bar", {2, bm::Counter::kAvgThreads}}, {"Baz", {4, bm::Counter::kAvgThreads}}, {"Bat", {8, bm::Counter::kAvgThreads}}, {"Frob", {16, bm::Counter::kAvgThreads}}, {"Lob", {32, bm::Counter::kAvgThreads}}, }); } BENCHMARK(BM_Counters_Tabular)->ThreadRange(1, 2)->Repetitions(2); ADD_CASES(TC_JSONOut, {{"\"name\": \"BM_Counters_Tabular/repeats:2/threads:1\",$"}, {"\"family_index\": 0,$", MR_Next}, {"\"per_family_instance_index\": 0,$", MR_Next}, {"\"run_name\": \"BM_Counters_Tabular/repeats:2/threads:1\",$", MR_Next}, {"\"run_type\": \"iteration\",$", MR_Next}, {"\"repetitions\": 2,$", MR_Next}, {"\"repetition_index\": 0,$", MR_Next}, {"\"threads\": 1,$", MR_Next}, {"\"iterations\": %int,$", MR_Next}, {"\"real_time\": %float,$", MR_Next}, {"\"cpu_time\": %float,$", MR_Next}, {"\"time_unit\": \"ns\",$", MR_Next}, {"\"Bar\": %float,$", MR_Next}, {"\"Bat\": %float,$", MR_Next}, {"\"Baz\": %float,$", MR_Next}, {"\"Foo\": %float,$", MR_Next}, {"\"Frob\": %float,$", MR_Next}, {"\"Lob\": %float$", MR_Next}, {"}", MR_Next}}); ADD_CASES(TC_JSONOut, {{"\"name\": \"BM_Counters_Tabular/repeats:2/threads:1\",$"}, {"\"family_index\": 0,$", MR_Next}, {"\"per_family_instance_index\": 0,$", MR_Next}, {"\"run_name\": \"BM_Counters_Tabular/repeats:2/threads:1\",$", MR_Next}, {"\"run_type\": \"iteration\",$", MR_Next}, {"\"repetitions\": 2,$", MR_Next}, {"\"repetition_index\": 1,$", MR_Next}, {"\"threads\": 1,$", MR_Next}, {"\"iterations\": %int,$", MR_Next}, {"\"real_time\": %float,$", MR_Next}, {"\"cpu_time\": %float,$", MR_Next}, {"\"time_unit\": \"ns\",$", MR_Next}, {"\"Bar\": %float,$", MR_Next}, {"\"Bat\": %float,$", MR_Next}, {"\"Baz\": %float,$", MR_Next}, {"\"Foo\": %float,$", MR_Next}, {"\"Frob\": %float,$", MR_Next}, {"\"Lob\": %float$", MR_Next}, {"}", MR_Next}}); ADD_CASES(TC_JSONOut, {{"\"name\": \"BM_Counters_Tabular/repeats:2/threads:1_mean\",$"}, {"\"family_index\": 0,$", MR_Next}, {"\"per_family_instance_index\": 0,$", MR_Next}, {"\"run_name\": \"BM_Counters_Tabular/repeats:2/threads:1\",$", MR_Next}, {"\"run_type\": \"aggregate\",$", MR_Next}, {"\"repetitions\": 2,$", MR_Next}, {"\"threads\": 1,$", MR_Next}, {"\"aggregate_name\": \"mean\",$", MR_Next}, {"\"aggregate_unit\": \"time\",$", MR_Next}, {"\"iterations\": %int,$", MR_Next}, {"\"real_time\": %float,$", MR_Next}, {"\"cpu_time\": %float,$", MR_Next}, {"\"time_unit\": \"ns\",$", MR_Next}, {"\"Bar\": %float,$", MR_Next}, {"\"Bat\": %float,$", MR_Next}, {"\"Baz\": %float,$", MR_Next}, {"\"Foo\": %float,$", MR_Next}, {"\"Frob\": %float,$", MR_Next}, {"\"Lob\": %float$", MR_Next}, {"}", MR_Next}}); ADD_CASES(TC_JSONOut, {{"\"name\": \"BM_Counters_Tabular/repeats:2/threads:1_median\",$"}, {"\"family_index\": 0,$", MR_Next}, {"\"per_family_instance_index\": 0,$", MR_Next}, {"\"run_name\": \"BM_Counters_Tabular/repeats:2/threads:1\",$", MR_Next}, {"\"run_type\": \"aggregate\",$", MR_Next}, {"\"repetitions\": 2,$", MR_Next}, {"\"threads\": 1,$", MR_Next}, {"\"aggregate_name\": \"median\",$", MR_Next}, {"\"aggregate_unit\": \"time\",$", MR_Next}, {"\"iterations\": %int,$", MR_Next}, {"\"real_time\": %float,$", MR_Next}, {"\"cpu_time\": %float,$", MR_Next}, {"\"time_unit\": \"ns\",$", MR_Next}, {"\"Bar\": %float,$", MR_Next}, {"\"Bat\": %float,$", MR_Next}, {"\"Baz\": %float,$", MR_Next}, {"\"Foo\": %float,$", MR_Next}, {"\"Frob\": %float,$", MR_Next}, {"\"Lob\": %float$", MR_Next}, {"}", MR_Next}}); ADD_CASES(TC_JSONOut, {{"\"name\": \"BM_Counters_Tabular/repeats:2/threads:1_stddev\",$"}, {"\"family_index\": 0,$", MR_Next}, {"\"per_family_instance_index\": 0,$", MR_Next}, {"\"run_name\": \"BM_Counters_Tabular/repeats:2/threads:1\",$", MR_Next}, {"\"run_type\": \"aggregate\",$", MR_Next}, {"\"repetitions\": 2,$", MR_Next}, {"\"threads\": 1,$", MR_Next}, {"\"aggregate_name\": \"stddev\",$", MR_Next}, {"\"aggregate_unit\": \"time\",$", MR_Next}, {"\"iterations\": %int,$", MR_Next}, {"\"real_time\": %float,$", MR_Next}, {"\"cpu_time\": %float,$", MR_Next}, {"\"time_unit\": \"ns\",$", MR_Next}, {"\"Bar\": %float,$", MR_Next}, {"\"Bat\": %float,$", MR_Next}, {"\"Baz\": %float,$", MR_Next}, {"\"Foo\": %float,$", MR_Next}, {"\"Frob\": %float,$", MR_Next}, {"\"Lob\": %float$", MR_Next}, {"}", MR_Next}}); ADD_CASES(TC_JSONOut, {{"\"name\": \"BM_Counters_Tabular/repeats:2/threads:1_cv\",$"}, {"\"family_index\": 0,$", MR_Next}, {"\"per_family_instance_index\": 0,$", MR_Next}, {"\"run_name\": \"BM_Counters_Tabular/repeats:2/threads:1\",$", MR_Next}, {"\"run_type\": \"aggregate\",$", MR_Next}, {"\"repetitions\": 2,$", MR_Next}, {"\"threads\": 1,$", MR_Next}, {"\"aggregate_name\": \"cv\",$", MR_Next}, {"\"aggregate_unit\": \"percentage\",$", MR_Next}, {"\"iterations\": %int,$", MR_Next}, {"\"real_time\": %float,$", MR_Next}, {"\"cpu_time\": %float,$", MR_Next}, {"\"time_unit\": \"ns\",$", MR_Next}, {"\"Bar\": %float,$", MR_Next}, {"\"Bat\": %float,$", MR_Next}, {"\"Baz\": %float,$", MR_Next}, {"\"Foo\": %float,$", MR_Next}, {"\"Frob\": %float,$", MR_Next}, {"\"Lob\": %float$", MR_Next}, {"}", MR_Next}}); ADD_CASES(TC_JSONOut, {{"\"name\": \"BM_Counters_Tabular/repeats:2/threads:2\",$"}, {"\"family_index\": 0,$", MR_Next}, {"\"per_family_instance_index\": 1,$", MR_Next}, {"\"run_name\": \"BM_Counters_Tabular/repeats:2/threads:2\",$", MR_Next}, {"\"run_type\": \"iteration\",$", MR_Next}, {"\"repetitions\": 2,$", MR_Next}, {"\"repetition_index\": 0,$", MR_Next}, {"\"threads\": 2,$", MR_Next}, {"\"iterations\": %int,$", MR_Next}, {"\"real_time\": %float,$", MR_Next}, {"\"cpu_time\": %float,$", MR_Next}, {"\"time_unit\": \"ns\",$", MR_Next}, {"\"Bar\": %float,$", MR_Next}, {"\"Bat\": %float,$", MR_Next}, {"\"Baz\": %float,$", MR_Next}, {"\"Foo\": %float,$", MR_Next}, {"\"Frob\": %float,$", MR_Next}, {"\"Lob\": %float$", MR_Next}, {"}", MR_Next}}); ADD_CASES(TC_JSONOut, {{"\"name\": \"BM_Counters_Tabular/repeats:2/threads:2\",$"}, {"\"family_index\": 0,$", MR_Next}, {"\"per_family_instance_index\": 1,$", MR_Next}, {"\"run_name\": \"BM_Counters_Tabular/repeats:2/threads:2\",$", MR_Next}, {"\"run_type\": \"iteration\",$", MR_Next}, {"\"repetitions\": 2,$", MR_Next}, {"\"repetition_index\": 1,$", MR_Next}, {"\"threads\": 2,$", MR_Next}, {"\"iterations\": %int,$", MR_Next}, {"\"real_time\": %float,$", MR_Next}, {"\"cpu_time\": %float,$", MR_Next}, {"\"time_unit\": \"ns\",$", MR_Next}, {"\"Bar\": %float,$", MR_Next}, {"\"Bat\": %float,$", MR_Next}, {"\"Baz\": %float,$", MR_Next}, {"\"Foo\": %float,$", MR_Next}, {"\"Frob\": %float,$", MR_Next}, {"\"Lob\": %float$", MR_Next}, {"}", MR_Next}}); ADD_CASES(TC_JSONOut, {{"\"name\": \"BM_Counters_Tabular/repeats:2/threads:2_median\",$"}, {"\"family_index\": 0,$", MR_Next}, {"\"per_family_instance_index\": 1,$", MR_Next}, {"\"run_name\": \"BM_Counters_Tabular/repeats:2/threads:2\",$", MR_Next}, {"\"run_type\": \"aggregate\",$", MR_Next}, {"\"repetitions\": 2,$", MR_Next}, {"\"threads\": 2,$", MR_Next}, {"\"aggregate_name\": \"median\",$", MR_Next}, {"\"aggregate_unit\": \"time\",$", MR_Next}, {"\"iterations\": %int,$", MR_Next}, {"\"real_time\": %float,$", MR_Next}, {"\"cpu_time\": %float,$", MR_Next}, {"\"time_unit\": \"ns\",$", MR_Next}, {"\"Bar\": %float,$", MR_Next}, {"\"Bat\": %float,$", MR_Next}, {"\"Baz\": %float,$", MR_Next}, {"\"Foo\": %float,$", MR_Next}, {"\"Frob\": %float,$", MR_Next}, {"\"Lob\": %float$", MR_Next}, {"}", MR_Next}}); ADD_CASES(TC_JSONOut, {{"\"name\": \"BM_Counters_Tabular/repeats:2/threads:2_stddev\",$"}, {"\"family_index\": 0,$", MR_Next}, {"\"per_family_instance_index\": 1,$", MR_Next}, {"\"run_name\": \"BM_Counters_Tabular/repeats:2/threads:2\",$", MR_Next}, {"\"run_type\": \"aggregate\",$", MR_Next}, {"\"repetitions\": 2,$", MR_Next}, {"\"threads\": 2,$", MR_Next}, {"\"aggregate_name\": \"stddev\",$", MR_Next}, {"\"aggregate_unit\": \"time\",$", MR_Next}, {"\"iterations\": %int,$", MR_Next}, {"\"real_time\": %float,$", MR_Next}, {"\"cpu_time\": %float,$", MR_Next}, {"\"time_unit\": \"ns\",$", MR_Next}, {"\"Bar\": %float,$", MR_Next}, {"\"Bat\": %float,$", MR_Next}, {"\"Baz\": %float,$", MR_Next}, {"\"Foo\": %float,$", MR_Next}, {"\"Frob\": %float,$", MR_Next}, {"\"Lob\": %float$", MR_Next}, {"}", MR_Next}}); ADD_CASES(TC_JSONOut, {{"\"name\": \"BM_Counters_Tabular/repeats:2/threads:2_cv\",$"}, {"\"family_index\": 0,$", MR_Next}, {"\"per_family_instance_index\": 1,$", MR_Next}, {"\"run_name\": \"BM_Counters_Tabular/repeats:2/threads:2\",$", MR_Next}, {"\"run_type\": \"aggregate\",$", MR_Next}, {"\"repetitions\": 2,$", MR_Next}, {"\"threads\": 2,$", MR_Next}, {"\"aggregate_name\": \"cv\",$", MR_Next}, {"\"aggregate_unit\": \"percentage\",$", MR_Next}, {"\"iterations\": %int,$", MR_Next}, {"\"real_time\": %float,$", MR_Next}, {"\"cpu_time\": %float,$", MR_Next}, {"\"time_unit\": \"ns\",$", MR_Next}, {"\"Bar\": %float,$", MR_Next}, {"\"Bat\": %float,$", MR_Next}, {"\"Baz\": %float,$", MR_Next}, {"\"Foo\": %float,$", MR_Next}, {"\"Frob\": %float,$", MR_Next}, {"\"Lob\": %float$", MR_Next}, {"}", MR_Next}}); ADD_CASES(TC_CSVOut, {{"^\"BM_Counters_Tabular/repeats:2/threads:1\",%csv_report," "%float,%float,%float,%float,%float,%float$"}}); ADD_CASES(TC_CSVOut, {{"^\"BM_Counters_Tabular/repeats:2/threads:1\",%csv_report," "%float,%float,%float,%float,%float,%float$"}}); ADD_CASES(TC_CSVOut, {{"^\"BM_Counters_Tabular/repeats:2/threads:1_mean\",%csv_report," "%float,%float,%float,%float,%float,%float$"}}); ADD_CASES(TC_CSVOut, {{"^\"BM_Counters_Tabular/repeats:2/threads:1_median\",%csv_report," "%float,%float,%float,%float,%float,%float$"}}); ADD_CASES(TC_CSVOut, {{"^\"BM_Counters_Tabular/repeats:2/threads:1_stddev\",%csv_report," "%float,%float,%float,%float,%float,%float$"}}); ADD_CASES(TC_CSVOut, {{"^\"BM_Counters_Tabular/repeats:2/threads:1_cv\",%csv_cv_report," "%float,%float,%float,%float,%float,%float$"}}); ADD_CASES(TC_CSVOut, {{"^\"BM_Counters_Tabular/repeats:2/threads:2\",%csv_report," "%float,%float,%float,%float,%float,%float$"}}); ADD_CASES(TC_CSVOut, {{"^\"BM_Counters_Tabular/repeats:2/threads:2\",%csv_report," "%float,%float,%float,%float,%float,%float$"}}); ADD_CASES(TC_CSVOut, {{"^\"BM_Counters_Tabular/repeats:2/threads:2_mean\",%csv_report," "%float,%float,%float,%float,%float,%float$"}}); ADD_CASES(TC_CSVOut, {{"^\"BM_Counters_Tabular/repeats:2/threads:2_median\",%csv_report," "%float,%float,%float,%float,%float,%float$"}}); ADD_CASES(TC_CSVOut, {{"^\"BM_Counters_Tabular/repeats:2/threads:2_stddev\",%csv_report," "%float,%float,%float,%float,%float,%float$"}}); ADD_CASES(TC_CSVOut, {{"^\"BM_Counters_Tabular/repeats:2/threads:2_cv\",%csv_cv_report," "%float,%float,%float,%float,%float,%float$"}}); // VS2013 does not allow this function to be passed as a lambda argument // to CHECK_BENCHMARK_RESULTS() void CheckTabular(Results const& e) { CHECK_COUNTER_VALUE(e, int, "Foo", EQ, 1); CHECK_COUNTER_VALUE(e, int, "Bar", EQ, 2); CHECK_COUNTER_VALUE(e, int, "Baz", EQ, 4); CHECK_COUNTER_VALUE(e, int, "Bat", EQ, 8); CHECK_COUNTER_VALUE(e, int, "Frob", EQ, 16); CHECK_COUNTER_VALUE(e, int, "Lob", EQ, 32); } CHECK_BENCHMARK_RESULTS("BM_Counters_Tabular/repeats:2/threads:1$", &CheckTabular); CHECK_BENCHMARK_RESULTS("BM_Counters_Tabular/repeats:2/threads:2$", &CheckTabular); // ========================================================================= // // -------------------- Tabular+Rate Counters Output ----------------------- // // ========================================================================= // void BM_CounterRates_Tabular(benchmark::State& state) { for (auto _ : state) { // This test requires a non-zero CPU time to avoid divide-by-zero auto iterations = static_cast(state.iterations()) * static_cast(state.iterations()); benchmark::DoNotOptimize(iterations); } namespace bm = benchmark; state.counters.insert({ {"Foo", {1, bm::Counter::kAvgThreadsRate}}, {"Bar", {2, bm::Counter::kAvgThreadsRate}}, {"Baz", {4, bm::Counter::kAvgThreadsRate}}, {"Bat", {8, bm::Counter::kAvgThreadsRate}}, {"Frob", {16, bm::Counter::kAvgThreadsRate}}, {"Lob", {32, bm::Counter::kAvgThreadsRate}}, }); } BENCHMARK(BM_CounterRates_Tabular)->ThreadRange(1, 16); ADD_CASES(TC_JSONOut, {{"\"name\": \"BM_CounterRates_Tabular/threads:%int\",$"}, {"\"family_index\": 1,$", MR_Next}, {"\"per_family_instance_index\": 0,$", MR_Next}, {"\"run_name\": \"BM_CounterRates_Tabular/threads:%int\",$", MR_Next}, {"\"run_type\": \"iteration\",$", MR_Next}, {"\"repetitions\": 1,$", MR_Next}, {"\"repetition_index\": 0,$", MR_Next}, {"\"threads\": 1,$", MR_Next}, {"\"iterations\": %int,$", MR_Next}, {"\"real_time\": %float,$", MR_Next}, {"\"cpu_time\": %float,$", MR_Next}, {"\"time_unit\": \"ns\",$", MR_Next}, {"\"Bar\": %float,$", MR_Next}, {"\"Bat\": %float,$", MR_Next}, {"\"Baz\": %float,$", MR_Next}, {"\"Foo\": %float,$", MR_Next}, {"\"Frob\": %float,$", MR_Next}, {"\"Lob\": %float$", MR_Next}, {"}", MR_Next}}); ADD_CASES(TC_CSVOut, {{"^\"BM_CounterRates_Tabular/threads:%int\",%csv_report," "%float,%float,%float,%float,%float,%float$"}}); // VS2013 does not allow this function to be passed as a lambda argument // to CHECK_BENCHMARK_RESULTS() void CheckTabularRate(Results const& e) { double t = e.DurationCPUTime() / e.NumThreads(); CHECK_FLOAT_COUNTER_VALUE(e, "Foo", EQ, 1. / t, 0.001); CHECK_FLOAT_COUNTER_VALUE(e, "Bar", EQ, 2. / t, 0.001); CHECK_FLOAT_COUNTER_VALUE(e, "Baz", EQ, 4. / t, 0.001); CHECK_FLOAT_COUNTER_VALUE(e, "Bat", EQ, 8. / t, 0.001); CHECK_FLOAT_COUNTER_VALUE(e, "Frob", EQ, 16. / t, 0.001); CHECK_FLOAT_COUNTER_VALUE(e, "Lob", EQ, 32. / t, 0.001); } CHECK_BENCHMARK_RESULTS("BM_CounterRates_Tabular/threads:%int", &CheckTabularRate); // ========================================================================= // // ------------------------- Tabular Counters Output ----------------------- // // ========================================================================= // // set only some of the counters void BM_CounterSet0_Tabular(benchmark::State& state) { for (auto _ : state) { } namespace bm = benchmark; state.counters.insert({ {"Foo", {10, bm::Counter::kAvgThreads}}, {"Bar", {20, bm::Counter::kAvgThreads}}, {"Baz", {40, bm::Counter::kAvgThreads}}, }); } BENCHMARK(BM_CounterSet0_Tabular)->ThreadRange(1, 16); ADD_CASES(TC_JSONOut, {{"\"name\": \"BM_CounterSet0_Tabular/threads:%int\",$"}, {"\"family_index\": 2,$", MR_Next}, {"\"per_family_instance_index\": 0,$", MR_Next}, {"\"run_name\": \"BM_CounterSet0_Tabular/threads:%int\",$", MR_Next}, {"\"run_type\": \"iteration\",$", MR_Next}, {"\"repetitions\": 1,$", MR_Next}, {"\"repetition_index\": 0,$", MR_Next}, {"\"threads\": 1,$", MR_Next}, {"\"iterations\": %int,$", MR_Next}, {"\"real_time\": %float,$", MR_Next}, {"\"cpu_time\": %float,$", MR_Next}, {"\"time_unit\": \"ns\",$", MR_Next}, {"\"Bar\": %float,$", MR_Next}, {"\"Baz\": %float,$", MR_Next}, {"\"Foo\": %float$", MR_Next}, {"}", MR_Next}}); ADD_CASES(TC_CSVOut, {{"^\"BM_CounterSet0_Tabular/threads:%int\",%csv_report," "%float,,%float,%float,,"}}); // VS2013 does not allow this function to be passed as a lambda argument // to CHECK_BENCHMARK_RESULTS() void CheckSet0(Results const& e) { CHECK_COUNTER_VALUE(e, int, "Foo", EQ, 10); CHECK_COUNTER_VALUE(e, int, "Bar", EQ, 20); CHECK_COUNTER_VALUE(e, int, "Baz", EQ, 40); } CHECK_BENCHMARK_RESULTS("BM_CounterSet0_Tabular", &CheckSet0); // again. void BM_CounterSet1_Tabular(benchmark::State& state) { for (auto _ : state) { } namespace bm = benchmark; state.counters.insert({ {"Foo", {15, bm::Counter::kAvgThreads}}, {"Bar", {25, bm::Counter::kAvgThreads}}, {"Baz", {45, bm::Counter::kAvgThreads}}, }); } BENCHMARK(BM_CounterSet1_Tabular)->ThreadRange(1, 16); ADD_CASES(TC_JSONOut, {{"\"name\": \"BM_CounterSet1_Tabular/threads:%int\",$"}, {"\"family_index\": 3,$", MR_Next}, {"\"per_family_instance_index\": 0,$", MR_Next}, {"\"run_name\": \"BM_CounterSet1_Tabular/threads:%int\",$", MR_Next}, {"\"run_type\": \"iteration\",$", MR_Next}, {"\"repetitions\": 1,$", MR_Next}, {"\"repetition_index\": 0,$", MR_Next}, {"\"threads\": 1,$", MR_Next}, {"\"iterations\": %int,$", MR_Next}, {"\"real_time\": %float,$", MR_Next}, {"\"cpu_time\": %float,$", MR_Next}, {"\"time_unit\": \"ns\",$", MR_Next}, {"\"Bar\": %float,$", MR_Next}, {"\"Baz\": %float,$", MR_Next}, {"\"Foo\": %float$", MR_Next}, {"}", MR_Next}}); ADD_CASES(TC_CSVOut, {{"^\"BM_CounterSet1_Tabular/threads:%int\",%csv_report," "%float,,%float,%float,,"}}); // VS2013 does not allow this function to be passed as a lambda argument // to CHECK_BENCHMARK_RESULTS() void CheckSet1(Results const& e) { CHECK_COUNTER_VALUE(e, int, "Foo", EQ, 15); CHECK_COUNTER_VALUE(e, int, "Bar", EQ, 25); CHECK_COUNTER_VALUE(e, int, "Baz", EQ, 45); } CHECK_BENCHMARK_RESULTS("BM_CounterSet1_Tabular/threads:%int", &CheckSet1); // ========================================================================= // // ------------------------- Tabular Counters Output ----------------------- // // ========================================================================= // // set only some of the counters, different set now. void BM_CounterSet2_Tabular(benchmark::State& state) { for (auto _ : state) { } namespace bm = benchmark; state.counters.insert({ {"Foo", {10, bm::Counter::kAvgThreads}}, {"Bat", {30, bm::Counter::kAvgThreads}}, {"Baz", {40, bm::Counter::kAvgThreads}}, }); } BENCHMARK(BM_CounterSet2_Tabular)->ThreadRange(1, 16); ADD_CASES(TC_JSONOut, {{"\"name\": \"BM_CounterSet2_Tabular/threads:%int\",$"}, {"\"family_index\": 4,$", MR_Next}, {"\"per_family_instance_index\": 0,$", MR_Next}, {"\"run_name\": \"BM_CounterSet2_Tabular/threads:%int\",$", MR_Next}, {"\"run_type\": \"iteration\",$", MR_Next}, {"\"repetitions\": 1,$", MR_Next}, {"\"repetition_index\": 0,$", MR_Next}, {"\"threads\": 1,$", MR_Next}, {"\"iterations\": %int,$", MR_Next}, {"\"real_time\": %float,$", MR_Next}, {"\"cpu_time\": %float,$", MR_Next}, {"\"time_unit\": \"ns\",$", MR_Next}, {"\"Bat\": %float,$", MR_Next}, {"\"Baz\": %float,$", MR_Next}, {"\"Foo\": %float$", MR_Next}, {"}", MR_Next}}); ADD_CASES(TC_CSVOut, {{"^\"BM_CounterSet2_Tabular/threads:%int\",%csv_report," ",%float,%float,%float,,"}}); // VS2013 does not allow this function to be passed as a lambda argument // to CHECK_BENCHMARK_RESULTS() void CheckSet2(Results const& e) { CHECK_COUNTER_VALUE(e, int, "Foo", EQ, 10); CHECK_COUNTER_VALUE(e, int, "Bat", EQ, 30); CHECK_COUNTER_VALUE(e, int, "Baz", EQ, 40); } CHECK_BENCHMARK_RESULTS("BM_CounterSet2_Tabular", &CheckSet2); } // end namespace // ========================================================================= // // --------------------------- TEST CASES END ------------------------------ // // ========================================================================= // int main(int argc, char* argv[]) { benchmark::MaybeReenterWithoutASLR(argc, argv); RunOutputTests(argc, argv); } ================================================ FILE: test/user_counters_test.cc ================================================ #undef NDEBUG #include "benchmark/benchmark_api.h" #include "benchmark/counter.h" #include "benchmark/registration.h" #include "benchmark/state.h" #include "benchmark/utils.h" #include "output_test.h" // ========================================================================= // // ---------------------- Testing Prologue Output -------------------------- // // ========================================================================= // // clang-format off ADD_CASES(TC_ConsoleOut, {{"^[-]+$", MR_Next}, {"^Benchmark %s Time %s CPU %s Iterations UserCounters...$", MR_Next}, {"^[-]+$", MR_Next}}); ADD_CASES(TC_CSVOut, {{"%csv_header,\"bar\",\"foo\""}}); // clang-format on // ========================================================================= // // ------------------------- Simple Counters Output ------------------------ // // ========================================================================= // namespace { void BM_Counters_Simple(benchmark::State& state) { for (auto _ : state) { } state.counters["foo"] = 1; state.counters["bar"] = 2 * static_cast(state.iterations()); } BENCHMARK(BM_Counters_Simple); ADD_CASES(TC_ConsoleOut, {{"^BM_Counters_Simple %console_report bar=%hrfloat foo=%hrfloat$"}}); ADD_CASES(TC_JSONOut, {{"\"name\": \"BM_Counters_Simple\",$"}, {"\"family_index\": 0,$", MR_Next}, {"\"per_family_instance_index\": 0,$", MR_Next}, {"\"run_name\": \"BM_Counters_Simple\",$", MR_Next}, {"\"run_type\": \"iteration\",$", MR_Next}, {"\"repetitions\": 1,$", MR_Next}, {"\"repetition_index\": 0,$", MR_Next}, {"\"threads\": 1,$", MR_Next}, {"\"iterations\": %int,$", MR_Next}, {"\"real_time\": %float,$", MR_Next}, {"\"cpu_time\": %float,$", MR_Next}, {"\"time_unit\": \"ns\",$", MR_Next}, {"\"bar\": %float,$", MR_Next}, {"\"foo\": %float$", MR_Next}, {"}", MR_Next}}); ADD_CASES(TC_CSVOut, {{"^\"BM_Counters_Simple\",%csv_report,%float,%float$"}}); // VS2013 does not allow this function to be passed as a lambda argument // to CHECK_BENCHMARK_RESULTS() void CheckSimple(Results const& e) { double its = e.NumIterations(); CHECK_COUNTER_VALUE(e, int, "foo", EQ, 1); // check that the value of bar is within 0.1% of the expected value CHECK_FLOAT_COUNTER_VALUE(e, "bar", EQ, 2. * its, 0.001); } CHECK_BENCHMARK_RESULTS("BM_Counters_Simple", &CheckSimple); } // end namespace // ========================================================================= // // --------------------- Counters+Items+Bytes/s Output --------------------- // // ========================================================================= // namespace { int num_calls1 = 0; void BM_Counters_WithBytesAndItemsPSec(benchmark::State& state) { for (auto _ : state) { // This test requires a non-zero CPU time to avoid divide-by-zero auto iterations = static_cast(state.iterations()) * static_cast(state.iterations()); benchmark::DoNotOptimize(iterations); } state.counters["foo"] = 1; state.counters["bar"] = ++num_calls1; state.SetBytesProcessed(364); state.SetItemsProcessed(150); } BENCHMARK(BM_Counters_WithBytesAndItemsPSec); ADD_CASES(TC_ConsoleOut, {{"^BM_Counters_WithBytesAndItemsPSec %console_report " "bar=%hrfloat bytes_per_second=%hrfloat/s " "foo=%hrfloat items_per_second=%hrfloat/s$"}}); ADD_CASES(TC_JSONOut, {{"\"name\": \"BM_Counters_WithBytesAndItemsPSec\",$"}, {"\"family_index\": 1,$", MR_Next}, {"\"per_family_instance_index\": 0,$", MR_Next}, {"\"run_name\": \"BM_Counters_WithBytesAndItemsPSec\",$", MR_Next}, {"\"run_type\": \"iteration\",$", MR_Next}, {"\"repetitions\": 1,$", MR_Next}, {"\"repetition_index\": 0,$", MR_Next}, {"\"threads\": 1,$", MR_Next}, {"\"iterations\": %int,$", MR_Next}, {"\"real_time\": %float,$", MR_Next}, {"\"cpu_time\": %float,$", MR_Next}, {"\"time_unit\": \"ns\",$", MR_Next}, {"\"bar\": %float,$", MR_Next}, {"\"bytes_per_second\": %float,$", MR_Next}, {"\"foo\": %float,$", MR_Next}, {"\"items_per_second\": %float$", MR_Next}, {"}", MR_Next}}); ADD_CASES(TC_CSVOut, {{"^\"BM_Counters_WithBytesAndItemsPSec\"," "%csv_bytes_items_report,%float,%float$"}}); // VS2013 does not allow this function to be passed as a lambda argument // to CHECK_BENCHMARK_RESULTS() void CheckBytesAndItemsPSec(Results const& e) { double t = e.DurationCPUTime(); // this (and not real time) is the time used CHECK_COUNTER_VALUE(e, int, "foo", EQ, 1); CHECK_COUNTER_VALUE(e, int, "bar", EQ, num_calls1); // check that the values are within 0.1% of the expected values CHECK_FLOAT_RESULT_VALUE(e, "bytes_per_second", EQ, 364. / t, 0.001); CHECK_FLOAT_RESULT_VALUE(e, "items_per_second", EQ, 150. / t, 0.001); } CHECK_BENCHMARK_RESULTS("BM_Counters_WithBytesAndItemsPSec", &CheckBytesAndItemsPSec); } // end namespace // ========================================================================= // // ------------------------- Rate Counters Output -------------------------- // // ========================================================================= // namespace { void BM_Counters_Rate(benchmark::State& state) { for (auto _ : state) { // This test requires a non-zero CPU time to avoid divide-by-zero auto iterations = static_cast(state.iterations()) * static_cast(state.iterations()); benchmark::DoNotOptimize(iterations); } namespace bm = benchmark; state.counters["foo"] = bm::Counter{1, bm::Counter::kIsRate}; state.counters["bar"] = bm::Counter{2, bm::Counter::kIsRate}; } BENCHMARK(BM_Counters_Rate); ADD_CASES( TC_ConsoleOut, {{"^BM_Counters_Rate %console_report bar=%hrfloat/s foo=%hrfloat/s$"}}); ADD_CASES(TC_JSONOut, {{"\"name\": \"BM_Counters_Rate\",$"}, {"\"family_index\": 2,$", MR_Next}, {"\"per_family_instance_index\": 0,$", MR_Next}, {"\"run_name\": \"BM_Counters_Rate\",$", MR_Next}, {"\"run_type\": \"iteration\",$", MR_Next}, {"\"repetitions\": 1,$", MR_Next}, {"\"repetition_index\": 0,$", MR_Next}, {"\"threads\": 1,$", MR_Next}, {"\"iterations\": %int,$", MR_Next}, {"\"real_time\": %float,$", MR_Next}, {"\"cpu_time\": %float,$", MR_Next}, {"\"time_unit\": \"ns\",$", MR_Next}, {"\"bar\": %float,$", MR_Next}, {"\"foo\": %float$", MR_Next}, {"}", MR_Next}}); ADD_CASES(TC_CSVOut, {{"^\"BM_Counters_Rate\",%csv_report,%float,%float$"}}); // VS2013 does not allow this function to be passed as a lambda argument // to CHECK_BENCHMARK_RESULTS() void CheckRate(Results const& e) { double t = e.DurationCPUTime(); // this (and not real time) is the time used // check that the values are within 0.1% of the expected values CHECK_FLOAT_COUNTER_VALUE(e, "foo", EQ, 1. / t, 0.001); CHECK_FLOAT_COUNTER_VALUE(e, "bar", EQ, 2. / t, 0.001); } CHECK_BENCHMARK_RESULTS("BM_Counters_Rate", &CheckRate); } // end namespace // ========================================================================= // // ----------------------- Inverted Counters Output ------------------------ // // ========================================================================= // namespace { void BM_Invert(benchmark::State& state) { for (auto _ : state) { // This test requires a non-zero CPU time to avoid divide-by-zero auto iterations = static_cast(state.iterations()) * static_cast(state.iterations()); benchmark::DoNotOptimize(iterations); } namespace bm = benchmark; state.counters["foo"] = bm::Counter{0.0001, bm::Counter::kInvert}; state.counters["bar"] = bm::Counter{10000, bm::Counter::kInvert}; } BENCHMARK(BM_Invert); ADD_CASES(TC_ConsoleOut, {{"^BM_Invert %console_report bar=%hrfloatu foo=%hrfloatk$"}}); ADD_CASES(TC_JSONOut, {{"\"name\": \"BM_Invert\",$"}, {"\"family_index\": 3,$", MR_Next}, {"\"per_family_instance_index\": 0,$", MR_Next}, {"\"run_name\": \"BM_Invert\",$", MR_Next}, {"\"run_type\": \"iteration\",$", MR_Next}, {"\"repetitions\": 1,$", MR_Next}, {"\"repetition_index\": 0,$", MR_Next}, {"\"threads\": 1,$", MR_Next}, {"\"iterations\": %int,$", MR_Next}, {"\"real_time\": %float,$", MR_Next}, {"\"cpu_time\": %float,$", MR_Next}, {"\"time_unit\": \"ns\",$", MR_Next}, {"\"bar\": %float,$", MR_Next}, {"\"foo\": %float$", MR_Next}, {"}", MR_Next}}); ADD_CASES(TC_CSVOut, {{"^\"BM_Invert\",%csv_report,%float,%float$"}}); // VS2013 does not allow this function to be passed as a lambda argument // to CHECK_BENCHMARK_RESULTS() void CheckInvert(Results const& e) { CHECK_FLOAT_COUNTER_VALUE(e, "foo", EQ, 10000, 0.0001); CHECK_FLOAT_COUNTER_VALUE(e, "bar", EQ, 0.0001, 0.0001); } CHECK_BENCHMARK_RESULTS("BM_Invert", &CheckInvert); } // end namespace // ========================================================================= // // --------------------- InvertedRate Counters Output ---------------------- // // ========================================================================= // namespace { void BM_Counters_InvertedRate(benchmark::State& state) { for (auto _ : state) { // This test requires a non-zero CPU time to avoid divide-by-zero auto iterations = static_cast(state.iterations()) * static_cast(state.iterations()); benchmark::DoNotOptimize(iterations); } namespace bm = benchmark; state.counters["foo"] = bm::Counter{1, bm::Counter::kIsRate | bm::Counter::kInvert}; state.counters["bar"] = bm::Counter{8192, bm::Counter::kIsRate | bm::Counter::kInvert}; } BENCHMARK(BM_Counters_InvertedRate); ADD_CASES(TC_ConsoleOut, {{"^BM_Counters_InvertedRate %console_report " "bar=%hrfloats foo=%hrfloats$"}}); ADD_CASES(TC_JSONOut, {{"\"name\": \"BM_Counters_InvertedRate\",$"}, {"\"family_index\": 4,$", MR_Next}, {"\"per_family_instance_index\": 0,$", MR_Next}, {"\"run_name\": \"BM_Counters_InvertedRate\",$", MR_Next}, {"\"run_type\": \"iteration\",$", MR_Next}, {"\"repetitions\": 1,$", MR_Next}, {"\"repetition_index\": 0,$", MR_Next}, {"\"threads\": 1,$", MR_Next}, {"\"iterations\": %int,$", MR_Next}, {"\"real_time\": %float,$", MR_Next}, {"\"cpu_time\": %float,$", MR_Next}, {"\"time_unit\": \"ns\",$", MR_Next}, {"\"bar\": %float,$", MR_Next}, {"\"foo\": %float$", MR_Next}, {"}", MR_Next}}); ADD_CASES(TC_CSVOut, {{"^\"BM_Counters_InvertedRate\",%csv_report,%float,%float$"}}); // VS2013 does not allow this function to be passed as a lambda argument // to CHECK_BENCHMARK_RESULTS() void CheckInvertedRate(Results const& e) { double t = e.DurationCPUTime(); // this (and not real time) is the time used // check that the values are within 0.1% of the expected values CHECK_FLOAT_COUNTER_VALUE(e, "foo", EQ, t, 0.001); CHECK_FLOAT_COUNTER_VALUE(e, "bar", EQ, t / 8192.0, 0.001); } CHECK_BENCHMARK_RESULTS("BM_Counters_InvertedRate", &CheckInvertedRate); } // end namespace // ========================================================================= // // ------------------------- Thread Counters Output ------------------------ // // ========================================================================= // namespace { void BM_Counters_Threads(benchmark::State& state) { for (auto _ : state) { } state.counters["foo"] = 1; state.counters["bar"] = 2; } BENCHMARK(BM_Counters_Threads)->ThreadRange(1, 8); ADD_CASES(TC_ConsoleOut, {{"^BM_Counters_Threads/threads:%int %console_report " "bar=%hrfloat foo=%hrfloat$"}}); ADD_CASES(TC_JSONOut, {{"\"name\": \"BM_Counters_Threads/threads:%int\",$"}, {"\"family_index\": 5,$", MR_Next}, {"\"per_family_instance_index\": 0,$", MR_Next}, {"\"run_name\": \"BM_Counters_Threads/threads:%int\",$", MR_Next}, {"\"run_type\": \"iteration\",$", MR_Next}, {"\"repetitions\": 1,$", MR_Next}, {"\"repetition_index\": 0,$", MR_Next}, {"\"threads\": 1,$", MR_Next}, {"\"iterations\": %int,$", MR_Next}, {"\"real_time\": %float,$", MR_Next}, {"\"cpu_time\": %float,$", MR_Next}, {"\"time_unit\": \"ns\",$", MR_Next}, {"\"bar\": %float,$", MR_Next}, {"\"foo\": %float$", MR_Next}, {"}", MR_Next}}); ADD_CASES( TC_CSVOut, {{"^\"BM_Counters_Threads/threads:%int\",%csv_report,%float,%float$"}}); // VS2013 does not allow this function to be passed as a lambda argument // to CHECK_BENCHMARK_RESULTS() void CheckThreads(Results const& e) { CHECK_COUNTER_VALUE(e, int, "foo", EQ, e.NumThreads()); CHECK_COUNTER_VALUE(e, int, "bar", EQ, 2 * e.NumThreads()); } CHECK_BENCHMARK_RESULTS("BM_Counters_Threads/threads:%int", &CheckThreads); } // end namespace // ========================================================================= // // ---------------------- ThreadAvg Counters Output ------------------------ // // ========================================================================= // namespace { void BM_Counters_AvgThreads(benchmark::State& state) { for (auto _ : state) { } namespace bm = benchmark; state.counters["foo"] = bm::Counter{1, bm::Counter::kAvgThreads}; state.counters["bar"] = bm::Counter{2, bm::Counter::kAvgThreads}; } BENCHMARK(BM_Counters_AvgThreads)->ThreadRange(1, 8); ADD_CASES(TC_ConsoleOut, {{"^BM_Counters_AvgThreads/threads:%int " "%console_report bar=%hrfloat foo=%hrfloat$"}}); ADD_CASES(TC_JSONOut, {{"\"name\": \"BM_Counters_AvgThreads/threads:%int\",$"}, {"\"family_index\": 6,$", MR_Next}, {"\"per_family_instance_index\": 0,$", MR_Next}, {"\"run_name\": \"BM_Counters_AvgThreads/threads:%int\",$", MR_Next}, {"\"run_type\": \"iteration\",$", MR_Next}, {"\"repetitions\": 1,$", MR_Next}, {"\"repetition_index\": 0,$", MR_Next}, {"\"threads\": 1,$", MR_Next}, {"\"iterations\": %int,$", MR_Next}, {"\"real_time\": %float,$", MR_Next}, {"\"cpu_time\": %float,$", MR_Next}, {"\"time_unit\": \"ns\",$", MR_Next}, {"\"bar\": %float,$", MR_Next}, {"\"foo\": %float$", MR_Next}, {"}", MR_Next}}); ADD_CASES( TC_CSVOut, {{"^\"BM_Counters_AvgThreads/threads:%int\",%csv_report,%float,%float$"}}); // VS2013 does not allow this function to be passed as a lambda argument // to CHECK_BENCHMARK_RESULTS() void CheckAvgThreads(Results const& e) { CHECK_COUNTER_VALUE(e, int, "foo", EQ, 1); CHECK_COUNTER_VALUE(e, int, "bar", EQ, 2); } CHECK_BENCHMARK_RESULTS("BM_Counters_AvgThreads/threads:%int", &CheckAvgThreads); } // end namespace // ========================================================================= // // ---------------------- ThreadAvg Counters Output ------------------------ // // ========================================================================= // namespace { void BM_Counters_AvgThreadsRate(benchmark::State& state) { for (auto _ : state) { // This test requires a non-zero CPU time to avoid divide-by-zero auto iterations = static_cast(state.iterations()) * static_cast(state.iterations()); benchmark::DoNotOptimize(iterations); } namespace bm = benchmark; state.counters["foo"] = bm::Counter{1, bm::Counter::kAvgThreadsRate}; state.counters["bar"] = bm::Counter{2, bm::Counter::kAvgThreadsRate}; } BENCHMARK(BM_Counters_AvgThreadsRate)->ThreadRange(1, 8); ADD_CASES(TC_ConsoleOut, {{"^BM_Counters_AvgThreadsRate/threads:%int " "%console_report bar=%hrfloat/s foo=%hrfloat/s$"}}); ADD_CASES(TC_JSONOut, {{"\"name\": \"BM_Counters_AvgThreadsRate/threads:%int\",$"}, {"\"family_index\": 7,$", MR_Next}, {"\"per_family_instance_index\": 0,$", MR_Next}, {"\"run_name\": \"BM_Counters_AvgThreadsRate/threads:%int\",$", MR_Next}, {"\"run_type\": \"iteration\",$", MR_Next}, {"\"repetitions\": 1,$", MR_Next}, {"\"repetition_index\": 0,$", MR_Next}, {"\"threads\": 1,$", MR_Next}, {"\"iterations\": %int,$", MR_Next}, {"\"real_time\": %float,$", MR_Next}, {"\"cpu_time\": %float,$", MR_Next}, {"\"time_unit\": \"ns\",$", MR_Next}, {"\"bar\": %float,$", MR_Next}, {"\"foo\": %float$", MR_Next}, {"}", MR_Next}}); ADD_CASES(TC_CSVOut, {{"^\"BM_Counters_AvgThreadsRate/" "threads:%int\",%csv_report,%float,%float$"}}); // VS2013 does not allow this function to be passed as a lambda argument // to CHECK_BENCHMARK_RESULTS() void CheckAvgThreadsRate(Results const& e) { // this (and not real time) is the time used double t = e.DurationCPUTime() / e.NumThreads(); CHECK_FLOAT_COUNTER_VALUE(e, "foo", EQ, 1. / t, 0.001); CHECK_FLOAT_COUNTER_VALUE(e, "bar", EQ, 2. / t, 0.001); } CHECK_BENCHMARK_RESULTS("BM_Counters_AvgThreadsRate/threads:%int", &CheckAvgThreadsRate); } // end namespace // ========================================================================= // // ------------------- IterationInvariant Counters Output ------------------ // // ========================================================================= // namespace { void BM_Counters_IterationInvariant(benchmark::State& state) { for (auto _ : state) { } namespace bm = benchmark; state.counters["foo"] = bm::Counter{1, bm::Counter::kIsIterationInvariant}; state.counters["bar"] = bm::Counter{2, bm::Counter::kIsIterationInvariant}; } BENCHMARK(BM_Counters_IterationInvariant); ADD_CASES(TC_ConsoleOut, {{"^BM_Counters_IterationInvariant %console_report " "bar=%hrfloat foo=%hrfloat$"}}); ADD_CASES(TC_JSONOut, {{"\"name\": \"BM_Counters_IterationInvariant\",$"}, {"\"family_index\": 8,$", MR_Next}, {"\"per_family_instance_index\": 0,$", MR_Next}, {"\"run_name\": \"BM_Counters_IterationInvariant\",$", MR_Next}, {"\"run_type\": \"iteration\",$", MR_Next}, {"\"repetitions\": 1,$", MR_Next}, {"\"repetition_index\": 0,$", MR_Next}, {"\"threads\": 1,$", MR_Next}, {"\"iterations\": %int,$", MR_Next}, {"\"real_time\": %float,$", MR_Next}, {"\"cpu_time\": %float,$", MR_Next}, {"\"time_unit\": \"ns\",$", MR_Next}, {"\"bar\": %float,$", MR_Next}, {"\"foo\": %float$", MR_Next}, {"}", MR_Next}}); ADD_CASES(TC_CSVOut, {{"^\"BM_Counters_IterationInvariant\",%csv_report,%float,%float$"}}); // VS2013 does not allow this function to be passed as a lambda argument // to CHECK_BENCHMARK_RESULTS() void CheckIterationInvariant(Results const& e) { double its = e.NumIterations(); // check that the values are within 0.1% of the expected value CHECK_FLOAT_COUNTER_VALUE(e, "foo", EQ, its, 0.001); CHECK_FLOAT_COUNTER_VALUE(e, "bar", EQ, 2. * its, 0.001); } CHECK_BENCHMARK_RESULTS("BM_Counters_IterationInvariant", &CheckIterationInvariant); } // end namespace // ========================================================================= // // ----------------- IterationInvariantRate Counters Output ---------------- // // ========================================================================= // namespace { void BM_Counters_kIsIterationInvariantRate(benchmark::State& state) { for (auto _ : state) { // This test requires a non-zero CPU time to avoid divide-by-zero auto iterations = static_cast(state.iterations()) * static_cast(state.iterations()); benchmark::DoNotOptimize(iterations); } namespace bm = benchmark; state.counters["foo"] = bm::Counter{1, bm::Counter::kIsIterationInvariantRate}; state.counters["bar"] = bm::Counter{2, bm::Counter::kIsRate | bm::Counter::kIsIterationInvariant}; } BENCHMARK(BM_Counters_kIsIterationInvariantRate); ADD_CASES(TC_ConsoleOut, {{"^BM_Counters_kIsIterationInvariantRate " "%console_report bar=%hrfloat/s foo=%hrfloat/s$"}}); ADD_CASES(TC_JSONOut, {{"\"name\": \"BM_Counters_kIsIterationInvariantRate\",$"}, {"\"family_index\": 9,$", MR_Next}, {"\"per_family_instance_index\": 0,$", MR_Next}, {"\"run_name\": \"BM_Counters_kIsIterationInvariantRate\",$", MR_Next}, {"\"run_type\": \"iteration\",$", MR_Next}, {"\"repetitions\": 1,$", MR_Next}, {"\"repetition_index\": 0,$", MR_Next}, {"\"threads\": 1,$", MR_Next}, {"\"iterations\": %int,$", MR_Next}, {"\"real_time\": %float,$", MR_Next}, {"\"cpu_time\": %float,$", MR_Next}, {"\"time_unit\": \"ns\",$", MR_Next}, {"\"bar\": %float,$", MR_Next}, {"\"foo\": %float$", MR_Next}, {"}", MR_Next}}); ADD_CASES(TC_CSVOut, {{"^\"BM_Counters_kIsIterationInvariantRate\",%csv_report," "%float,%float$"}}); // VS2013 does not allow this function to be passed as a lambda argument // to CHECK_BENCHMARK_RESULTS() void CheckIsIterationInvariantRate(Results const& e) { double its = e.NumIterations(); double t = e.DurationCPUTime(); // this (and not real time) is the time used // check that the values are within 0.1% of the expected values CHECK_FLOAT_COUNTER_VALUE(e, "foo", EQ, its * 1. / t, 0.001); CHECK_FLOAT_COUNTER_VALUE(e, "bar", EQ, its * 2. / t, 0.001); } CHECK_BENCHMARK_RESULTS("BM_Counters_kIsIterationInvariantRate", &CheckIsIterationInvariantRate); } // end namespace // ========================================================================= // // --------------------- AvgIterations Counters Output --------------------- // // ========================================================================= // namespace { void BM_Counters_AvgIterations(benchmark::State& state) { for (auto _ : state) { } namespace bm = benchmark; state.counters["foo"] = bm::Counter{1, bm::Counter::kAvgIterations}; state.counters["bar"] = bm::Counter{2, bm::Counter::kAvgIterations}; } BENCHMARK(BM_Counters_AvgIterations); ADD_CASES(TC_ConsoleOut, {{"^BM_Counters_AvgIterations %console_report " "bar=%hrfloat foo=%hrfloat$"}}); ADD_CASES(TC_JSONOut, {{"\"name\": \"BM_Counters_AvgIterations\",$"}, {"\"family_index\": 10,$", MR_Next}, {"\"per_family_instance_index\": 0,$", MR_Next}, {"\"run_name\": \"BM_Counters_AvgIterations\",$", MR_Next}, {"\"run_type\": \"iteration\",$", MR_Next}, {"\"repetitions\": 1,$", MR_Next}, {"\"repetition_index\": 0,$", MR_Next}, {"\"threads\": 1,$", MR_Next}, {"\"iterations\": %int,$", MR_Next}, {"\"real_time\": %float,$", MR_Next}, {"\"cpu_time\": %float,$", MR_Next}, {"\"time_unit\": \"ns\",$", MR_Next}, {"\"bar\": %float,$", MR_Next}, {"\"foo\": %float$", MR_Next}, {"}", MR_Next}}); ADD_CASES(TC_CSVOut, {{"^\"BM_Counters_AvgIterations\",%csv_report,%float,%float$"}}); // VS2013 does not allow this function to be passed as a lambda argument // to CHECK_BENCHMARK_RESULTS() void CheckAvgIterations(Results const& e) { double its = e.NumIterations(); // check that the values are within 0.1% of the expected value CHECK_FLOAT_COUNTER_VALUE(e, "foo", EQ, 1. / its, 0.001); CHECK_FLOAT_COUNTER_VALUE(e, "bar", EQ, 2. / its, 0.001); } CHECK_BENCHMARK_RESULTS("BM_Counters_AvgIterations", &CheckAvgIterations); } // end namespace // ========================================================================= // // ------------------- AvgIterationsRate Counters Output ------------------- // // ========================================================================= // namespace { void BM_Counters_kAvgIterationsRate(benchmark::State& state) { for (auto _ : state) { // This test requires a non-zero CPU time to avoid divide-by-zero auto iterations = static_cast(state.iterations()) * static_cast(state.iterations()); benchmark::DoNotOptimize(iterations); } namespace bm = benchmark; state.counters["foo"] = bm::Counter{1, bm::Counter::kAvgIterationsRate}; state.counters["bar"] = bm::Counter{2, bm::Counter::kIsRate | bm::Counter::kAvgIterations}; } BENCHMARK(BM_Counters_kAvgIterationsRate); ADD_CASES(TC_ConsoleOut, {{"^BM_Counters_kAvgIterationsRate " "%console_report bar=%hrfloat/s foo=%hrfloat/s$"}}); ADD_CASES(TC_JSONOut, {{"\"name\": \"BM_Counters_kAvgIterationsRate\",$"}, {"\"family_index\": 11,$", MR_Next}, {"\"per_family_instance_index\": 0,$", MR_Next}, {"\"run_name\": \"BM_Counters_kAvgIterationsRate\",$", MR_Next}, {"\"run_type\": \"iteration\",$", MR_Next}, {"\"repetitions\": 1,$", MR_Next}, {"\"repetition_index\": 0,$", MR_Next}, {"\"threads\": 1,$", MR_Next}, {"\"iterations\": %int,$", MR_Next}, {"\"real_time\": %float,$", MR_Next}, {"\"cpu_time\": %float,$", MR_Next}, {"\"time_unit\": \"ns\",$", MR_Next}, {"\"bar\": %float,$", MR_Next}, {"\"foo\": %float$", MR_Next}, {"}", MR_Next}}); ADD_CASES(TC_CSVOut, {{"^\"BM_Counters_kAvgIterationsRate\",%csv_report," "%float,%float$"}}); // VS2013 does not allow this function to be passed as a lambda argument // to CHECK_BENCHMARK_RESULTS() void CheckAvgIterationsRate(Results const& e) { double its = e.NumIterations(); double t = e.DurationCPUTime(); // this (and not real time) is the time used // check that the values are within 0.1% of the expected values CHECK_FLOAT_COUNTER_VALUE(e, "foo", EQ, 1. / its / t, 0.001); CHECK_FLOAT_COUNTER_VALUE(e, "bar", EQ, 2. / its / t, 0.001); } CHECK_BENCHMARK_RESULTS("BM_Counters_kAvgIterationsRate", &CheckAvgIterationsRate); } // end namespace // ========================================================================= // // --------------------------- TEST CASES END ------------------------------ // // ========================================================================= // int main(int argc, char* argv[]) { benchmark::MaybeReenterWithoutASLR(argc, argv); RunOutputTests(argc, argv); } ================================================ FILE: test/user_counters_thousands_test.cc ================================================ #undef NDEBUG #include "benchmark/benchmark_api.h" #include "benchmark/counter.h" #include "benchmark/registration.h" #include "benchmark/state.h" #include "output_test.h" namespace { // ========================================================================= // // ------------------------ Thousands Customisation ------------------------ // // ========================================================================= // void BM_Counters_Thousands(benchmark::State& state) { for (auto _ : state) { } namespace bm = benchmark; state.counters.insert({ {"t0_1000000DefaultBase", bm::Counter(1000 * 1000, bm::Counter::kDefaults)}, {"t1_1000000Base1000", bm::Counter(1000 * 1000, bm::Counter::kDefaults, bm::Counter::OneK::kIs1000)}, {"t2_1000000Base1024", bm::Counter(1000 * 1000, bm::Counter::kDefaults, bm::Counter::OneK::kIs1024)}, {"t3_1048576Base1000", bm::Counter(1024 * 1024, bm::Counter::kDefaults, bm::Counter::OneK::kIs1000)}, {"t4_1048576Base1024", bm::Counter(1024 * 1024, bm::Counter::kDefaults, bm::Counter::OneK::kIs1024)}, }); } BENCHMARK(BM_Counters_Thousands)->Repetitions(2); ADD_CASES( TC_ConsoleOut, { {"^BM_Counters_Thousands/repeats:2 %console_report " "t0_1000000DefaultBase=1M " "t1_1000000Base1000=1M t2_1000000Base1024=976.56[23]Ki " "t3_1048576Base1000=1.04858M t4_1048576Base1024=1Mi$"}, {"^BM_Counters_Thousands/repeats:2 %console_report " "t0_1000000DefaultBase=1M " "t1_1000000Base1000=1M t2_1000000Base1024=976.56[23]Ki " "t3_1048576Base1000=1.04858M t4_1048576Base1024=1Mi$"}, {"^BM_Counters_Thousands/repeats:2_mean %console_report " "t0_1000000DefaultBase=1M t1_1000000Base1000=1M " "t2_1000000Base1024=976.56[23]Ki t3_1048576Base1000=1.04858M " "t4_1048576Base1024=1Mi$"}, {"^BM_Counters_Thousands/repeats:2_median %console_report " "t0_1000000DefaultBase=1M t1_1000000Base1000=1M " "t2_1000000Base1024=976.56[23]Ki t3_1048576Base1000=1.04858M " "t4_1048576Base1024=1Mi$"}, {"^BM_Counters_Thousands/repeats:2_stddev %console_time_only_report [ " "]*2 t0_1000000DefaultBase=0 t1_1000000Base1000=0 " "t2_1000000Base1024=0 t3_1048576Base1000=0 t4_1048576Base1024=0$"}, }); ADD_CASES(TC_JSONOut, {{"\"name\": \"BM_Counters_Thousands/repeats:2\",$"}, {"\"family_index\": 0,$", MR_Next}, {"\"per_family_instance_index\": 0,$", MR_Next}, {"\"run_name\": \"BM_Counters_Thousands/repeats:2\",$", MR_Next}, {"\"run_type\": \"iteration\",$", MR_Next}, {"\"repetitions\": 2,$", MR_Next}, {"\"repetition_index\": 0,$", MR_Next}, {"\"threads\": 1,$", MR_Next}, {"\"iterations\": %int,$", MR_Next}, {"\"real_time\": %float,$", MR_Next}, {"\"cpu_time\": %float,$", MR_Next}, {"\"time_unit\": \"ns\",$", MR_Next}, {"\"t0_1000000DefaultBase\": 1\\.(0)*e\\+(0)*6,$", MR_Next}, {"\"t1_1000000Base1000\": 1\\.(0)*e\\+(0)*6,$", MR_Next}, {"\"t2_1000000Base1024\": 1\\.(0)*e\\+(0)*6,$", MR_Next}, {"\"t3_1048576Base1000\": 1\\.048576(0)*e\\+(0)*6,$", MR_Next}, {"\"t4_1048576Base1024\": 1\\.048576(0)*e\\+(0)*6$", MR_Next}, {"}", MR_Next}}); ADD_CASES(TC_JSONOut, {{"\"name\": \"BM_Counters_Thousands/repeats:2\",$"}, {"\"family_index\": 0,$", MR_Next}, {"\"per_family_instance_index\": 0,$", MR_Next}, {"\"run_name\": \"BM_Counters_Thousands/repeats:2\",$", MR_Next}, {"\"run_type\": \"iteration\",$", MR_Next}, {"\"repetitions\": 2,$", MR_Next}, {"\"repetition_index\": 1,$", MR_Next}, {"\"threads\": 1,$", MR_Next}, {"\"iterations\": %int,$", MR_Next}, {"\"real_time\": %float,$", MR_Next}, {"\"cpu_time\": %float,$", MR_Next}, {"\"time_unit\": \"ns\",$", MR_Next}, {"\"t0_1000000DefaultBase\": 1\\.(0)*e\\+(0)*6,$", MR_Next}, {"\"t1_1000000Base1000\": 1\\.(0)*e\\+(0)*6,$", MR_Next}, {"\"t2_1000000Base1024\": 1\\.(0)*e\\+(0)*6,$", MR_Next}, {"\"t3_1048576Base1000\": 1\\.048576(0)*e\\+(0)*6,$", MR_Next}, {"\"t4_1048576Base1024\": 1\\.048576(0)*e\\+(0)*6$", MR_Next}, {"}", MR_Next}}); ADD_CASES(TC_JSONOut, {{"\"name\": \"BM_Counters_Thousands/repeats:2_mean\",$"}, {"\"family_index\": 0,$", MR_Next}, {"\"per_family_instance_index\": 0,$", MR_Next}, {"\"run_name\": \"BM_Counters_Thousands/repeats:2\",$", MR_Next}, {"\"run_type\": \"aggregate\",$", MR_Next}, {"\"repetitions\": 2,$", MR_Next}, {"\"threads\": 1,$", MR_Next}, {"\"aggregate_name\": \"mean\",$", MR_Next}, {"\"aggregate_unit\": \"time\",$", MR_Next}, {"\"iterations\": 2,$", MR_Next}, {"\"real_time\": %float,$", MR_Next}, {"\"cpu_time\": %float,$", MR_Next}, {"\"time_unit\": \"ns\",$", MR_Next}, {"\"t0_1000000DefaultBase\": 1\\.(0)*e\\+(0)*6,$", MR_Next}, {"\"t1_1000000Base1000\": 1\\.(0)*e\\+(0)*6,$", MR_Next}, {"\"t2_1000000Base1024\": 1\\.(0)*e\\+(0)*6,$", MR_Next}, {"\"t3_1048576Base1000\": 1\\.048576(0)*e\\+(0)*6,$", MR_Next}, {"\"t4_1048576Base1024\": 1\\.048576(0)*e\\+(0)*6$", MR_Next}, {"}", MR_Next}}); ADD_CASES(TC_JSONOut, {{"\"name\": \"BM_Counters_Thousands/repeats:2_median\",$"}, {"\"family_index\": 0,$", MR_Next}, {"\"per_family_instance_index\": 0,$", MR_Next}, {"\"run_name\": \"BM_Counters_Thousands/repeats:2\",$", MR_Next}, {"\"run_type\": \"aggregate\",$", MR_Next}, {"\"repetitions\": 2,$", MR_Next}, {"\"threads\": 1,$", MR_Next}, {"\"aggregate_name\": \"median\",$", MR_Next}, {"\"aggregate_unit\": \"time\",$", MR_Next}, {"\"iterations\": 2,$", MR_Next}, {"\"real_time\": %float,$", MR_Next}, {"\"cpu_time\": %float,$", MR_Next}, {"\"time_unit\": \"ns\",$", MR_Next}, {"\"t0_1000000DefaultBase\": 1\\.(0)*e\\+(0)*6,$", MR_Next}, {"\"t1_1000000Base1000\": 1\\.(0)*e\\+(0)*6,$", MR_Next}, {"\"t2_1000000Base1024\": 1\\.(0)*e\\+(0)*6,$", MR_Next}, {"\"t3_1048576Base1000\": 1\\.048576(0)*e\\+(0)*6,$", MR_Next}, {"\"t4_1048576Base1024\": 1\\.048576(0)*e\\+(0)*6$", MR_Next}, {"}", MR_Next}}); ADD_CASES(TC_JSONOut, {{"\"name\": \"BM_Counters_Thousands/repeats:2_stddev\",$"}, {"\"family_index\": 0,$", MR_Next}, {"\"per_family_instance_index\": 0,$", MR_Next}, {"\"run_name\": \"BM_Counters_Thousands/repeats:2\",$", MR_Next}, {"\"run_type\": \"aggregate\",$", MR_Next}, {"\"repetitions\": 2,$", MR_Next}, {"\"threads\": 1,$", MR_Next}, {"\"aggregate_name\": \"stddev\",$", MR_Next}, {"\"aggregate_unit\": \"time\",$", MR_Next}, {"\"iterations\": 2,$", MR_Next}, {"\"real_time\": %float,$", MR_Next}, {"\"cpu_time\": %float,$", MR_Next}, {"\"time_unit\": \"ns\",$", MR_Next}, {"\"t0_1000000DefaultBase\": 0\\.(0)*e\\+(0)*,$", MR_Next}, {"\"t1_1000000Base1000\": 0\\.(0)*e\\+(0)*,$", MR_Next}, {"\"t2_1000000Base1024\": 0\\.(0)*e\\+(0)*,$", MR_Next}, {"\"t3_1048576Base1000\": 0\\.(0)*e\\+(0)*,$", MR_Next}, {"\"t4_1048576Base1024\": 0\\.(0)*e\\+(0)*$", MR_Next}, {"}", MR_Next}}); ADD_CASES( TC_CSVOut, {{"^\"BM_Counters_Thousands/" "repeats:2\",%csv_report,1e\\+(0)*6,1e\\+(0)*6,1e\\+(0)*6,1\\.04858e\\+(" "0)*6,1\\.04858e\\+(0)*6$"}, {"^\"BM_Counters_Thousands/" "repeats:2\",%csv_report,1e\\+(0)*6,1e\\+(0)*6,1e\\+(0)*6,1\\.04858e\\+(" "0)*6,1\\.04858e\\+(0)*6$"}, {"^\"BM_Counters_Thousands/" "repeats:2_mean\",%csv_report,1e\\+(0)*6,1e\\+(0)*6,1e\\+(0)*6,1\\." "04858e\\+(0)*6,1\\.04858e\\+(0)*6$"}, {"^\"BM_Counters_Thousands/" "repeats:2_median\",%csv_report,1e\\+(0)*6,1e\\+(0)*6,1e\\+(0)*6,1\\." "04858e\\+(0)*6,1\\.04858e\\+(0)*6$"}, {"^\"BM_Counters_Thousands/repeats:2_stddev\",%csv_report,0,0,0,0,0$"}}); // VS2013 does not allow this function to be passed as a lambda argument // to CHECK_BENCHMARK_RESULTS() void CheckThousands(Results const& e) { if (e.name != "BM_Counters_Thousands/repeats:2") { return; // Do not check the aggregates! } // check that the values are within 0.01% of the expected values CHECK_FLOAT_COUNTER_VALUE(e, "t0_1000000DefaultBase", EQ, 1000 * 1000, 0.0001); CHECK_FLOAT_COUNTER_VALUE(e, "t1_1000000Base1000", EQ, 1000 * 1000, 0.0001); CHECK_FLOAT_COUNTER_VALUE(e, "t2_1000000Base1024", EQ, 1000 * 1000, 0.0001); CHECK_FLOAT_COUNTER_VALUE(e, "t3_1048576Base1000", EQ, 1024 * 1024, 0.0001); CHECK_FLOAT_COUNTER_VALUE(e, "t4_1048576Base1024", EQ, 1024 * 1024, 0.0001); } CHECK_BENCHMARK_RESULTS("BM_Counters_Thousands", &CheckThousands); } // end namespace // ========================================================================= // // --------------------------- TEST CASES END ------------------------------ // // ========================================================================= // int main(int argc, char* argv[]) { benchmark::MaybeReenterWithoutASLR(argc, argv); RunOutputTests(argc, argv); } ================================================ FILE: test/user_counters_threads_test.cc ================================================ #undef NDEBUG #include "benchmark/benchmark_api.h" #include "benchmark/registration.h" #include "benchmark/state.h" #include "benchmark/utils.h" #include "output_test.h" // ========================================================================= // // ---------------------- Testing Prologue Output -------------------------- // // ========================================================================= // // clang-format off ADD_CASES(TC_ConsoleOut, {{"^[-]+$", MR_Next}, {"^Benchmark %s Time %s CPU %s Iterations UserCounters...$", MR_Next}, {"^[-]+$", MR_Next}}); ADD_CASES(TC_CSVOut, {{"%csv_header,\"bar\",\"foo\""}}); // clang-format on // ========================================================================= // // ------------------------- Simple Counters Output ------------------------ // // ========================================================================= // namespace { void BM_Counters_Simple(benchmark::State& state) { for (auto _ : state) { } state.counters["foo"] = 1; state.counters["bar"] = 2 * static_cast(state.iterations()); } BENCHMARK(BM_Counters_Simple)->ThreadRange(1, 8); ADD_CASES(TC_ConsoleOut, {{"^BM_Counters_Simple/threads:%int %console_report " "bar=%hrfloat foo=%hrfloat$"}}); ADD_CASES(TC_JSONOut, {{"\"name\": \"BM_Counters_Simple/threads:%int\",$"}, {"\"family_index\": 0,$", MR_Next}, {"\"per_family_instance_index\": 0,$", MR_Next}, {"\"run_name\": \"BM_Counters_Simple/threads:%int\",$", MR_Next}, {"\"run_type\": \"iteration\",$", MR_Next}, {"\"repetitions\": 1,$", MR_Next}, {"\"repetition_index\": 0,$", MR_Next}, {"\"threads\": %int,$", MR_Next}, {"\"iterations\": %int,$", MR_Next}, {"\"real_time\": %float,$", MR_Next}, {"\"cpu_time\": %float,$", MR_Next}, {"\"time_unit\": \"ns\",$", MR_Next}, {"\"bar\": %float,$", MR_Next}, {"\"foo\": %float$", MR_Next}, {"}", MR_Next}}); ADD_CASES( TC_CSVOut, {{"^\"BM_Counters_Simple/threads:%int\",%csv_report,%float,%float$"}}); // VS2013 does not allow this function to be passed as a lambda argument // to CHECK_BENCHMARK_RESULTS() void CheckSimple(Results const& e) { double its = e.NumIterations(); CHECK_COUNTER_VALUE(e, int, "foo", EQ, 1 * e.NumThreads()); // check that the value of bar is within 0.1% of the expected value CHECK_FLOAT_COUNTER_VALUE(e, "bar", EQ, 2. * its, 0.001); } CHECK_BENCHMARK_RESULTS("BM_Counters_Simple/threads:%int", &CheckSimple); } // end namespace // ========================================================================= // // --------------------- Counters+Items+Bytes/s Output --------------------- // // ========================================================================= // namespace { void BM_Counters_WithBytesAndItemsPSec(benchmark::State& state) { for (auto _ : state) { // This test requires a non-zero CPU time to avoid divide-by-zero auto iterations = static_cast(state.iterations()) * static_cast(state.iterations()); benchmark::DoNotOptimize(iterations); } state.counters["foo"] = 1; state.SetBytesProcessed(364); state.SetItemsProcessed(150); } BENCHMARK(BM_Counters_WithBytesAndItemsPSec)->ThreadRange(1, 8); ADD_CASES(TC_ConsoleOut, {{"^BM_Counters_WithBytesAndItemsPSec/threads:%int %console_report " "bytes_per_second=%hrfloat/s " "foo=%hrfloat items_per_second=%hrfloat/s$"}}); ADD_CASES( TC_JSONOut, {{"\"name\": \"BM_Counters_WithBytesAndItemsPSec/threads:%int\",$"}, {"\"family_index\": 1,$", MR_Next}, {"\"per_family_instance_index\": 0,$", MR_Next}, {"\"run_name\": \"BM_Counters_WithBytesAndItemsPSec/threads:%int\",$", MR_Next}, {"\"run_type\": \"iteration\",$", MR_Next}, {"\"repetitions\": 1,$", MR_Next}, {"\"repetition_index\": 0,$", MR_Next}, {"\"threads\": %int,$", MR_Next}, {"\"iterations\": %int,$", MR_Next}, {"\"real_time\": %float,$", MR_Next}, {"\"cpu_time\": %float,$", MR_Next}, {"\"time_unit\": \"ns\",$", MR_Next}, {"\"bytes_per_second\": %float,$", MR_Next}, {"\"foo\": %float,$", MR_Next}, {"\"items_per_second\": %float$", MR_Next}, {"}", MR_Next}}); ADD_CASES(TC_CSVOut, {{"^\"BM_Counters_WithBytesAndItemsPSec/threads:%int\"," "%csv_bytes_items_report,,%float$"}}); // VS2013 does not allow this function to be passed as a lambda argument // to CHECK_BENCHMARK_RESULTS() void CheckBytesAndItemsPSec(Results const& e) { // this (and not real time) is the time used double t = e.DurationCPUTime() / e.NumThreads(); CHECK_COUNTER_VALUE(e, int, "foo", EQ, 1 * e.NumThreads()); // check that the values are within 0.1% of the expected values CHECK_FLOAT_RESULT_VALUE(e, "bytes_per_second", EQ, (364. * e.NumThreads()) / t, 0.001); CHECK_FLOAT_RESULT_VALUE(e, "items_per_second", EQ, (150. * e.NumThreads()) / t, 0.001); } CHECK_BENCHMARK_RESULTS("BM_Counters_WithBytesAndItemsPSec/threads:%int", &CheckBytesAndItemsPSec); } // end namespace // ========================================================================= // // ------------------------- Rate Counters Output -------------------------- // // ========================================================================= // namespace { void BM_Counters_Rate(benchmark::State& state) { for (auto _ : state) { // This test requires a non-zero CPU time to avoid divide-by-zero auto iterations = static_cast(state.iterations()) * static_cast(state.iterations()); benchmark::DoNotOptimize(iterations); } namespace bm = benchmark; state.counters["foo"] = bm::Counter{1, bm::Counter::kIsRate}; state.counters["bar"] = bm::Counter{2, bm::Counter::kIsRate}; } BENCHMARK(BM_Counters_Rate)->ThreadRange(1, 8); ADD_CASES(TC_ConsoleOut, {{"^BM_Counters_Rate/threads:%int %console_report " "bar=%hrfloat/s foo=%hrfloat/s$"}}); ADD_CASES(TC_JSONOut, {{"\"name\": \"BM_Counters_Rate/threads:%int\",$"}, {"\"family_index\": 2,$", MR_Next}, {"\"per_family_instance_index\": 0,$", MR_Next}, {"\"run_name\": \"BM_Counters_Rate/threads:%int\",$", MR_Next}, {"\"run_type\": \"iteration\",$", MR_Next}, {"\"repetitions\": 1,$", MR_Next}, {"\"repetition_index\": 0,$", MR_Next}, {"\"threads\": %int,$", MR_Next}, {"\"iterations\": %int,$", MR_Next}, {"\"real_time\": %float,$", MR_Next}, {"\"cpu_time\": %float,$", MR_Next}, {"\"time_unit\": \"ns\",$", MR_Next}, {"\"bar\": %float,$", MR_Next}, {"\"foo\": %float$", MR_Next}, {"}", MR_Next}}); ADD_CASES(TC_CSVOut, {{"^\"BM_Counters_Rate/threads:%int\",%csv_report,%float,%float$"}}); // VS2013 does not allow this function to be passed as a lambda argument // to CHECK_BENCHMARK_RESULTS() void CheckRate(Results const& e) { // this (and not real time) is the time used double t = e.DurationCPUTime() / e.NumThreads(); // check that the values are within 0.1% of the expected values CHECK_FLOAT_COUNTER_VALUE(e, "foo", EQ, (1. * e.NumThreads()) / t, 0.001); CHECK_FLOAT_COUNTER_VALUE(e, "bar", EQ, (2. * e.NumThreads()) / t, 0.001); } CHECK_BENCHMARK_RESULTS("BM_Counters_Rate/threads:%int", &CheckRate); } // end namespace // ========================================================================= // // ----------------------- Inverted Counters Output ------------------------ // // ========================================================================= // namespace { void BM_Invert(benchmark::State& state) { for (auto _ : state) { // This test requires a non-zero CPU time to avoid divide-by-zero auto iterations = static_cast(state.iterations()) * static_cast(state.iterations()); benchmark::DoNotOptimize(iterations); } namespace bm = benchmark; state.counters["foo"] = bm::Counter{0.0001, bm::Counter::kInvert}; state.counters["bar"] = bm::Counter{10000, bm::Counter::kInvert}; } BENCHMARK(BM_Invert)->ThreadRange(1, 8); ADD_CASES( TC_ConsoleOut, {{"^BM_Invert/threads:%int %console_report bar=%hrfloatu foo=%hrfloatk$"}}); ADD_CASES(TC_JSONOut, {{"\"name\": \"BM_Invert/threads:%int\",$"}, {"\"family_index\": 3,$", MR_Next}, {"\"per_family_instance_index\": 0,$", MR_Next}, {"\"run_name\": \"BM_Invert/threads:%int\",$", MR_Next}, {"\"run_type\": \"iteration\",$", MR_Next}, {"\"repetitions\": 1,$", MR_Next}, {"\"repetition_index\": 0,$", MR_Next}, {"\"threads\": %int,$", MR_Next}, {"\"iterations\": %int,$", MR_Next}, {"\"real_time\": %float,$", MR_Next}, {"\"cpu_time\": %float,$", MR_Next}, {"\"time_unit\": \"ns\",$", MR_Next}, {"\"bar\": %float,$", MR_Next}, {"\"foo\": %float$", MR_Next}, {"}", MR_Next}}); ADD_CASES(TC_CSVOut, {{"^\"BM_Invert/threads:%int\",%csv_report,%float,%float$"}}); // VS2013 does not allow this function to be passed as a lambda argument // to CHECK_BENCHMARK_RESULTS() void CheckInvert(Results const& e) { CHECK_FLOAT_COUNTER_VALUE(e, "foo", EQ, 1. / (0.0001 * e.NumThreads()), 0.0001); CHECK_FLOAT_COUNTER_VALUE(e, "bar", EQ, 1. / (10000 * e.NumThreads()), 0.0001); } CHECK_BENCHMARK_RESULTS("BM_Invert/threads:%int", &CheckInvert); } // end namespace // ========================================================================= // // --------------------- InvertedRate Counters Output ---------------------- // // ========================================================================= // namespace { void BM_Counters_InvertedRate(benchmark::State& state) { for (auto _ : state) { // This test requires a non-zero CPU time to avoid divide-by-zero auto iterations = static_cast(state.iterations()) * static_cast(state.iterations()); benchmark::DoNotOptimize(iterations); } namespace bm = benchmark; state.counters["foo"] = bm::Counter{1, bm::Counter::kIsRate | bm::Counter::kInvert}; state.counters["bar"] = bm::Counter{8192, bm::Counter::kIsRate | bm::Counter::kInvert}; } BENCHMARK(BM_Counters_InvertedRate)->ThreadRange(1, 8); ADD_CASES(TC_ConsoleOut, {{"^BM_Counters_InvertedRate/threads:%int %console_report " "bar=%hrfloats foo=%hrfloats$"}}); ADD_CASES(TC_JSONOut, {{"\"name\": \"BM_Counters_InvertedRate/threads:%int\",$"}, {"\"family_index\": 4,$", MR_Next}, {"\"per_family_instance_index\": 0,$", MR_Next}, {"\"run_name\": \"BM_Counters_InvertedRate/threads:%int\",$", MR_Next}, {"\"run_type\": \"iteration\",$", MR_Next}, {"\"repetitions\": 1,$", MR_Next}, {"\"repetition_index\": 0,$", MR_Next}, {"\"threads\": %int,$", MR_Next}, {"\"iterations\": %int,$", MR_Next}, {"\"real_time\": %float,$", MR_Next}, {"\"cpu_time\": %float,$", MR_Next}, {"\"time_unit\": \"ns\",$", MR_Next}, {"\"bar\": %float,$", MR_Next}, {"\"foo\": %float$", MR_Next}, {"}", MR_Next}}); ADD_CASES(TC_CSVOut, {{"^\"BM_Counters_InvertedRate/" "threads:%int\",%csv_report,%float,%float$"}}); // VS2013 does not allow this function to be passed as a lambda argument // to CHECK_BENCHMARK_RESULTS() void CheckInvertedRate(Results const& e) { // this (and not real time) is the time used double t = e.DurationCPUTime() / e.NumThreads(); // check that the values are within 0.1% of the expected values CHECK_FLOAT_COUNTER_VALUE(e, "foo", EQ, t / (e.NumThreads()), 0.001); CHECK_FLOAT_COUNTER_VALUE(e, "bar", EQ, t / (8192.0 * e.NumThreads()), 0.001); } CHECK_BENCHMARK_RESULTS("BM_Counters_InvertedRate/threads:%int", &CheckInvertedRate); } // end namespace // ========================================================================= // // ------------------------- Thread Counters Output ------------------------ // // ========================================================================= // namespace { void BM_Counters_Threads(benchmark::State& state) { for (auto _ : state) { } state.counters["foo"] = 1; state.counters["bar"] = 2; } BENCHMARK(BM_Counters_Threads)->ThreadRange(1, 8); ADD_CASES(TC_ConsoleOut, {{"^BM_Counters_Threads/threads:%int %console_report " "bar=%hrfloat foo=%hrfloat$"}}); ADD_CASES(TC_JSONOut, {{"\"name\": \"BM_Counters_Threads/threads:%int\",$"}, {"\"family_index\": 5,$", MR_Next}, {"\"per_family_instance_index\": 0,$", MR_Next}, {"\"run_name\": \"BM_Counters_Threads/threads:%int\",$", MR_Next}, {"\"run_type\": \"iteration\",$", MR_Next}, {"\"repetitions\": 1,$", MR_Next}, {"\"repetition_index\": 0,$", MR_Next}, {"\"threads\": %int,$", MR_Next}, {"\"iterations\": %int,$", MR_Next}, {"\"real_time\": %float,$", MR_Next}, {"\"cpu_time\": %float,$", MR_Next}, {"\"time_unit\": \"ns\",$", MR_Next}, {"\"bar\": %float,$", MR_Next}, {"\"foo\": %float$", MR_Next}, {"}", MR_Next}}); ADD_CASES( TC_CSVOut, {{"^\"BM_Counters_Threads/threads:%int\",%csv_report,%float,%float$"}}); // VS2013 does not allow this function to be passed as a lambda argument // to CHECK_BENCHMARK_RESULTS() void CheckThreads(Results const& e) { CHECK_COUNTER_VALUE(e, int, "foo", EQ, e.NumThreads()); CHECK_COUNTER_VALUE(e, int, "bar", EQ, 2 * e.NumThreads()); } CHECK_BENCHMARK_RESULTS("BM_Counters_Threads/threads:%int", &CheckThreads); } // end namespace // ========================================================================= // // ---------------------- ThreadAvg Counters Output ------------------------ // // ========================================================================= // namespace { void BM_Counters_AvgThreads(benchmark::State& state) { for (auto _ : state) { } namespace bm = benchmark; state.counters["foo"] = bm::Counter{1, bm::Counter::kAvgThreads}; state.counters["bar"] = bm::Counter{2, bm::Counter::kAvgThreads}; } BENCHMARK(BM_Counters_AvgThreads)->ThreadRange(1, 8); ADD_CASES(TC_ConsoleOut, {{"^BM_Counters_AvgThreads/threads:%int " "%console_report bar=%hrfloat foo=%hrfloat$"}}); ADD_CASES(TC_JSONOut, {{"\"name\": \"BM_Counters_AvgThreads/threads:%int\",$"}, {"\"family_index\": 6,$", MR_Next}, {"\"per_family_instance_index\": 0,$", MR_Next}, {"\"run_name\": \"BM_Counters_AvgThreads/threads:%int\",$", MR_Next}, {"\"run_type\": \"iteration\",$", MR_Next}, {"\"repetitions\": 1,$", MR_Next}, {"\"repetition_index\": 0,$", MR_Next}, {"\"threads\": %int,$", MR_Next}, {"\"iterations\": %int,$", MR_Next}, {"\"real_time\": %float,$", MR_Next}, {"\"cpu_time\": %float,$", MR_Next}, {"\"time_unit\": \"ns\",$", MR_Next}, {"\"bar\": %float,$", MR_Next}, {"\"foo\": %float$", MR_Next}, {"}", MR_Next}}); ADD_CASES( TC_CSVOut, {{"^\"BM_Counters_AvgThreads/threads:%int\",%csv_report,%float,%float$"}}); // VS2013 does not allow this function to be passed as a lambda argument // to CHECK_BENCHMARK_RESULTS() void CheckAvgThreads(Results const& e) { CHECK_COUNTER_VALUE(e, int, "foo", EQ, 1); CHECK_COUNTER_VALUE(e, int, "bar", EQ, 2); } CHECK_BENCHMARK_RESULTS("BM_Counters_AvgThreads/threads:%int", &CheckAvgThreads); } // end namespace // ========================================================================= // // ---------------------- ThreadAvg Counters Output ------------------------ // // ========================================================================= // namespace { void BM_Counters_AvgThreadsRate(benchmark::State& state) { for (auto _ : state) { // This test requires a non-zero CPU time to avoid divide-by-zero auto iterations = static_cast(state.iterations()) * static_cast(state.iterations()); benchmark::DoNotOptimize(iterations); } namespace bm = benchmark; state.counters["foo"] = bm::Counter{1, bm::Counter::kAvgThreadsRate}; state.counters["bar"] = bm::Counter{2, bm::Counter::kAvgThreadsRate}; } BENCHMARK(BM_Counters_AvgThreadsRate)->ThreadRange(1, 8); ADD_CASES(TC_ConsoleOut, {{"^BM_Counters_AvgThreadsRate/threads:%int " "%console_report bar=%hrfloat/s foo=%hrfloat/s$"}}); ADD_CASES(TC_JSONOut, {{"\"name\": \"BM_Counters_AvgThreadsRate/threads:%int\",$"}, {"\"family_index\": 7,$", MR_Next}, {"\"per_family_instance_index\": 0,$", MR_Next}, {"\"run_name\": \"BM_Counters_AvgThreadsRate/threads:%int\",$", MR_Next}, {"\"run_type\": \"iteration\",$", MR_Next}, {"\"repetitions\": 1,$", MR_Next}, {"\"repetition_index\": 0,$", MR_Next}, {"\"threads\": %int,$", MR_Next}, {"\"iterations\": %int,$", MR_Next}, {"\"real_time\": %float,$", MR_Next}, {"\"cpu_time\": %float,$", MR_Next}, {"\"time_unit\": \"ns\",$", MR_Next}, {"\"bar\": %float,$", MR_Next}, {"\"foo\": %float$", MR_Next}, {"}", MR_Next}}); ADD_CASES(TC_CSVOut, {{"^\"BM_Counters_AvgThreadsRate/" "threads:%int\",%csv_report,%float,%float$"}}); // VS2013 does not allow this function to be passed as a lambda argument // to CHECK_BENCHMARK_RESULTS() void CheckAvgThreadsRate(Results const& e) { // this (and not real time) is the time used double t = e.DurationCPUTime() / e.NumThreads(); CHECK_FLOAT_COUNTER_VALUE(e, "foo", EQ, 1. / t, 0.001); CHECK_FLOAT_COUNTER_VALUE(e, "bar", EQ, 2. / t, 0.001); } CHECK_BENCHMARK_RESULTS("BM_Counters_AvgThreadsRate/threads:%int", &CheckAvgThreadsRate); } // end namespace // ========================================================================= // // ------------------- IterationInvariant Counters Output ------------------ // // ========================================================================= // namespace { void BM_Counters_IterationInvariant(benchmark::State& state) { for (auto _ : state) { } namespace bm = benchmark; state.counters["foo"] = bm::Counter{1, bm::Counter::kIsIterationInvariant}; state.counters["bar"] = bm::Counter{2, bm::Counter::kIsIterationInvariant}; } BENCHMARK(BM_Counters_IterationInvariant)->ThreadRange(1, 8); ADD_CASES(TC_ConsoleOut, {{"^BM_Counters_IterationInvariant/threads:%int %console_report " "bar=%hrfloat foo=%hrfloat$"}}); ADD_CASES(TC_JSONOut, {{"\"name\": \"BM_Counters_IterationInvariant/threads:%int\",$"}, {"\"family_index\": 8,$", MR_Next}, {"\"per_family_instance_index\": 0,$", MR_Next}, {"\"run_name\": \"BM_Counters_IterationInvariant/threads:%int\",$", MR_Next}, {"\"run_type\": \"iteration\",$", MR_Next}, {"\"repetitions\": 1,$", MR_Next}, {"\"repetition_index\": 0,$", MR_Next}, {"\"threads\": %int,$", MR_Next}, {"\"iterations\": %int,$", MR_Next}, {"\"real_time\": %float,$", MR_Next}, {"\"cpu_time\": %float,$", MR_Next}, {"\"time_unit\": \"ns\",$", MR_Next}, {"\"bar\": %float,$", MR_Next}, {"\"foo\": %float$", MR_Next}, {"}", MR_Next}}); ADD_CASES(TC_CSVOut, {{"^\"BM_Counters_IterationInvariant/" "threads:%int\",%csv_report,%float,%float$"}}); // VS2013 does not allow this function to be passed as a lambda argument // to CHECK_BENCHMARK_RESULTS() void CheckIterationInvariant(Results const& e) { double its = e.NumIterations(); // check that the values are within 0.1% of the expected value CHECK_FLOAT_COUNTER_VALUE(e, "foo", EQ, its * e.NumThreads(), 0.001); CHECK_FLOAT_COUNTER_VALUE(e, "bar", EQ, 2. * its * e.NumThreads(), 0.001); } CHECK_BENCHMARK_RESULTS("BM_Counters_IterationInvariant/threads:%int", &CheckIterationInvariant); } // end namespace // ========================================================================= // // ----------------- IterationInvariantRate Counters Output ---------------- // // ========================================================================= // namespace { void BM_Counters_kIsIterationInvariantRate(benchmark::State& state) { for (auto _ : state) { // This test requires a non-zero CPU time to avoid divide-by-zero auto iterations = static_cast(state.iterations()) * static_cast(state.iterations()); benchmark::DoNotOptimize(iterations); } namespace bm = benchmark; state.counters["foo"] = bm::Counter{1, bm::Counter::kIsIterationInvariantRate}; state.counters["bar"] = bm::Counter{2, bm::Counter::kIsRate | bm::Counter::kIsIterationInvariant}; } BENCHMARK(BM_Counters_kIsIterationInvariantRate)->ThreadRange(1, 8); ADD_CASES(TC_ConsoleOut, {{"^BM_Counters_kIsIterationInvariantRate/threads:%int " "%console_report bar=%hrfloat/s foo=%hrfloat/s$"}}); ADD_CASES( TC_JSONOut, {{"\"name\": \"BM_Counters_kIsIterationInvariantRate/threads:%int\",$"}, {"\"family_index\": 9,$", MR_Next}, {"\"per_family_instance_index\": 0,$", MR_Next}, {"\"run_name\": \"BM_Counters_kIsIterationInvariantRate/threads:%int\",$", MR_Next}, {"\"run_type\": \"iteration\",$", MR_Next}, {"\"repetitions\": 1,$", MR_Next}, {"\"repetition_index\": 0,$", MR_Next}, {"\"threads\": %int,$", MR_Next}, {"\"iterations\": %int,$", MR_Next}, {"\"real_time\": %float,$", MR_Next}, {"\"cpu_time\": %float,$", MR_Next}, {"\"time_unit\": \"ns\",$", MR_Next}, {"\"bar\": %float,$", MR_Next}, {"\"foo\": %float$", MR_Next}, {"}", MR_Next}}); ADD_CASES( TC_CSVOut, {{"^\"BM_Counters_kIsIterationInvariantRate/threads:%int\",%csv_report," "%float,%float$"}}); // VS2013 does not allow this function to be passed as a lambda argument // to CHECK_BENCHMARK_RESULTS() void CheckIsIterationInvariantRate(Results const& e) { double its = e.NumIterations(); // this (and not real time) is the time used double t = e.DurationCPUTime() / e.NumThreads(); // check that the values are within 0.1% of the expected values CHECK_FLOAT_COUNTER_VALUE(e, "foo", EQ, its * 1. * e.NumThreads() / t, 0.001); CHECK_FLOAT_COUNTER_VALUE(e, "bar", EQ, its * 2. * e.NumThreads() / t, 0.001); } CHECK_BENCHMARK_RESULTS("BM_Counters_kIsIterationInvariantRate/threads:%int", &CheckIsIterationInvariantRate); } // end namespace // ========================================================================= // // --------------------- AvgIterations Counters Output --------------------- // // ========================================================================= // namespace { void BM_Counters_AvgIterations(benchmark::State& state) { for (auto _ : state) { } namespace bm = benchmark; state.counters["foo"] = bm::Counter{1, bm::Counter::kAvgIterations}; state.counters["bar"] = bm::Counter{2, bm::Counter::kAvgIterations}; } BENCHMARK(BM_Counters_AvgIterations)->ThreadRange(1, 8); ADD_CASES(TC_ConsoleOut, {{"^BM_Counters_AvgIterations/threads:%int %console_report " "bar=%hrfloat foo=%hrfloat$"}}); ADD_CASES(TC_JSONOut, {{"\"name\": \"BM_Counters_AvgIterations/threads:%int\",$"}, {"\"family_index\": 10,$", MR_Next}, {"\"per_family_instance_index\": 0,$", MR_Next}, {"\"run_name\": \"BM_Counters_AvgIterations/threads:%int\",$", MR_Next}, {"\"run_type\": \"iteration\",$", MR_Next}, {"\"repetitions\": 1,$", MR_Next}, {"\"repetition_index\": 0,$", MR_Next}, {"\"threads\": %int,$", MR_Next}, {"\"iterations\": %int,$", MR_Next}, {"\"real_time\": %float,$", MR_Next}, {"\"cpu_time\": %float,$", MR_Next}, {"\"time_unit\": \"ns\",$", MR_Next}, {"\"bar\": %float,$", MR_Next}, {"\"foo\": %float$", MR_Next}, {"}", MR_Next}}); ADD_CASES(TC_CSVOut, {{"^\"BM_Counters_AvgIterations/" "threads:%int\",%csv_report,%float,%float$"}}); // VS2013 does not allow this function to be passed as a lambda argument // to CHECK_BENCHMARK_RESULTS() void CheckAvgIterations(Results const& e) { double its = e.NumIterations(); // check that the values are within 0.1% of the expected value CHECK_FLOAT_COUNTER_VALUE(e, "foo", EQ, 1. * e.NumThreads() / its, 0.001); CHECK_FLOAT_COUNTER_VALUE(e, "bar", EQ, 2. * e.NumThreads() / its, 0.001); } CHECK_BENCHMARK_RESULTS("BM_Counters_AvgIterations/threads:%int", &CheckAvgIterations); } // end namespace // ========================================================================= // // ------------------- AvgIterationsRate Counters Output ------------------- // // ========================================================================= // namespace { void BM_Counters_kAvgIterationsRate(benchmark::State& state) { for (auto _ : state) { // This test requires a non-zero CPU time to avoid divide-by-zero auto iterations = static_cast(state.iterations()) * static_cast(state.iterations()); benchmark::DoNotOptimize(iterations); } namespace bm = benchmark; state.counters["foo"] = bm::Counter{1, bm::Counter::kAvgIterationsRate}; state.counters["bar"] = bm::Counter{2, bm::Counter::kIsRate | bm::Counter::kAvgIterations}; } BENCHMARK(BM_Counters_kAvgIterationsRate)->ThreadRange(1, 8); ADD_CASES(TC_ConsoleOut, {{"^BM_Counters_kAvgIterationsRate/threads:%int " "%console_report bar=%hrfloat/s foo=%hrfloat/s$"}}); ADD_CASES(TC_JSONOut, {{"\"name\": \"BM_Counters_kAvgIterationsRate/threads:%int\",$"}, {"\"family_index\": 11,$", MR_Next}, {"\"per_family_instance_index\": 0,$", MR_Next}, {"\"run_name\": \"BM_Counters_kAvgIterationsRate/threads:%int\",$", MR_Next}, {"\"run_type\": \"iteration\",$", MR_Next}, {"\"repetitions\": 1,$", MR_Next}, {"\"repetition_index\": 0,$", MR_Next}, {"\"threads\": %int,$", MR_Next}, {"\"iterations\": %int,$", MR_Next}, {"\"real_time\": %float,$", MR_Next}, {"\"cpu_time\": %float,$", MR_Next}, {"\"time_unit\": \"ns\",$", MR_Next}, {"\"bar\": %float,$", MR_Next}, {"\"foo\": %float$", MR_Next}, {"}", MR_Next}}); ADD_CASES(TC_CSVOut, {{"^\"BM_Counters_kAvgIterationsRate/threads:%int\",%csv_report," "%float,%float$"}}); // VS2013 does not allow this function to be passed as a lambda argument // to CHECK_BENCHMARK_RESULTS() void CheckAvgIterationsRate(Results const& e) { double its = e.NumIterations(); // this (and not real time) is the time used double t = e.DurationCPUTime() / e.NumThreads(); // check that the values are within 0.1% of the expected values CHECK_FLOAT_COUNTER_VALUE(e, "foo", EQ, 1. * e.NumThreads() / its / t, 0.001); CHECK_FLOAT_COUNTER_VALUE(e, "bar", EQ, 2. * e.NumThreads() / its / t, 0.001); } CHECK_BENCHMARK_RESULTS("BM_Counters_kAvgIterationsRate/threads:%int", &CheckAvgIterationsRate); } // end namespace // ========================================================================= // // --------------------------- TEST CASES END ------------------------------ // // ========================================================================= // int main(int argc, char* argv[]) { benchmark::MaybeReenterWithoutASLR(argc, argv); RunOutputTests(argc, argv); } ================================================ FILE: tools/BUILD.bazel ================================================ load("@rules_python//python:defs.bzl", "py_binary", "py_library") load("@tools_pip_deps//:requirements.bzl", "requirement") py_library( name = "gbench", srcs = glob(["gbench/*.py"]), deps = [ requirement("numpy"), requirement("scipy"), ], ) py_binary( name = "compare", srcs = ["compare.py"], imports = ["."], python_version = "PY3", deps = [ ":gbench", ], ) ================================================ FILE: tools/compare.py ================================================ #!/usr/bin/env python3 # type: ignore """ compare.py - versatile benchmark output compare tool """ import argparse import json import os import sys import unittest from argparse import ArgumentParser import gbench from gbench import report, util def check_inputs(in1, in2, flags): """ Perform checking on the user provided inputs and diagnose any abnormalities """ in1_kind, _ = util.classify_input_file(in1) in2_kind, _ = util.classify_input_file(in2) output_file = util.find_benchmark_flag("--benchmark_out=", flags) output_type = util.find_benchmark_flag("--benchmark_out_format=", flags) if ( in1_kind == util.IT_Executable and in2_kind == util.IT_Executable and output_file ): print( ( "WARNING: '--benchmark_out=%s' will be passed to both " "benchmarks causing it to be overwritten" ) % output_file ) if in1_kind == util.IT_JSON and in2_kind == util.IT_JSON: # When both sides are JSON the only supported flag is # --benchmark_filter= for flag in util.remove_benchmark_flags("--benchmark_filter=", flags): print( "WARNING: passing %s has no effect since both " "inputs are JSON" % flag ) if output_type is not None and output_type != "json": print( ( "ERROR: passing '--benchmark_out_format=%s' to 'compare.py`" " is not supported." ) % output_type ) sys.exit(1) def create_parser(): parser = ArgumentParser( description="versatile benchmark output compare tool" ) parser.add_argument( "-a", "--display_aggregates_only", dest="display_aggregates_only", action="store_true", help="If there are repetitions, by default, we display everything - the" " actual runs, and the aggregates computed. Sometimes, it is " "desirable to only view the aggregates. E.g. when there are a lot " "of repetitions. Do note that only the display is affected. " "Internally, all the actual runs are still used, e.g. for U test.", ) parser.add_argument( "--no-color", dest="color", default=True, action="store_false", help="Do not use colors in the terminal output", ) parser.add_argument( "-d", "--dump_to_json", dest="dump_to_json", help=( "Additionally, dump benchmark comparison output to this file in" " JSON format." ), ) utest = parser.add_argument_group() utest.add_argument( "--no-utest", dest="utest", default=True, action="store_false", help=( "The tool can do a two-tailed Mann-Whitney U test with the null" " hypothesis that it is equally likely that a randomly selected" " value from one sample will be less than or greater than a" " randomly selected value from a second sample.\nWARNING: requires" f" **LARGE** (no less than {report.UTEST_OPTIMAL_REPETITIONS})" " number of repetitions to be meaningful!\nThe test is being done" f" by default, if at least {report.UTEST_MIN_REPETITIONS}" " repetitions were done.\nThis option can disable the U Test." ), ) alpha_default = 0.05 utest.add_argument( "--alpha", dest="utest_alpha", default=alpha_default, type=float, help=( "significance level alpha. if the calculated p-value is below this" " value, then the result is said to be statistically significant" " and the null hypothesis is rejected.\n(default: %0.4f)" ) % alpha_default, ) subparsers = parser.add_subparsers( help="This tool has multiple modes of operation:", dest="mode" ) parser_a = subparsers.add_parser( "benchmarks", help=( "The most simple use-case, compare all the output of these two" " benchmarks" ), ) baseline = parser_a.add_argument_group("baseline", "The benchmark baseline") baseline.add_argument( "test_baseline", metavar="test_baseline", type=argparse.FileType("r"), nargs=1, help="A benchmark executable or JSON output file", ) contender = parser_a.add_argument_group( "contender", "The benchmark that will be compared against the baseline" ) contender.add_argument( "test_contender", metavar="test_contender", type=argparse.FileType("r"), nargs=1, help="A benchmark executable or JSON output file", ) parser_a.add_argument( "benchmark_options", metavar="benchmark_options", nargs=argparse.REMAINDER, help="Arguments to pass when running benchmark executables", ) parser_b = subparsers.add_parser( "filters", help="Compare filter one with the filter two of benchmark" ) baseline = parser_b.add_argument_group("baseline", "The benchmark baseline") baseline.add_argument( "test", metavar="test", type=argparse.FileType("r"), nargs=1, help="A benchmark executable or JSON output file", ) baseline.add_argument( "filter_baseline", metavar="filter_baseline", type=str, nargs=1, help="The first filter, that will be used as baseline", ) contender = parser_b.add_argument_group( "contender", "The benchmark that will be compared against the baseline" ) contender.add_argument( "filter_contender", metavar="filter_contender", type=str, nargs=1, help="The second filter, that will be compared against the baseline", ) parser_b.add_argument( "benchmark_options", metavar="benchmark_options", nargs=argparse.REMAINDER, help="Arguments to pass when running benchmark executables", ) parser_c = subparsers.add_parser( "benchmarksfiltered", help=( "Compare filter one of first benchmark with filter two of the" " second benchmark" ), ) baseline = parser_c.add_argument_group("baseline", "The benchmark baseline") baseline.add_argument( "test_baseline", metavar="test_baseline", type=argparse.FileType("r"), nargs=1, help="A benchmark executable or JSON output file", ) baseline.add_argument( "filter_baseline", metavar="filter_baseline", type=str, nargs=1, help="The first filter, that will be used as baseline", ) contender = parser_c.add_argument_group( "contender", "The benchmark that will be compared against the baseline" ) contender.add_argument( "test_contender", metavar="test_contender", type=argparse.FileType("r"), nargs=1, help=( "The second benchmark executable or JSON output file, that will be" " compared against the baseline" ), ) contender.add_argument( "filter_contender", metavar="filter_contender", type=str, nargs=1, help="The second filter, that will be compared against the baseline", ) parser_c.add_argument( "benchmark_options", metavar="benchmark_options", nargs=argparse.REMAINDER, help="Arguments to pass when running benchmark executables", ) return parser def main(): # Parse the command line flags parser = create_parser() args, unknown_args = parser.parse_known_args() if args.mode is None: parser.print_help() exit(1) assert not unknown_args benchmark_options = args.benchmark_options if args.mode == "benchmarks": test_baseline = args.test_baseline[0].name test_contender = args.test_contender[0].name filter_baseline = "" filter_contender = "" # NOTE: if test_baseline == test_contender, you are analyzing the stdev description = "Comparing %s to %s" % (test_baseline, test_contender) elif args.mode == "filters": test_baseline = args.test[0].name test_contender = args.test[0].name filter_baseline = args.filter_baseline[0] filter_contender = args.filter_contender[0] # NOTE: if filter_baseline == filter_contender, you are analyzing the # stdev description = "Comparing %s to %s (from %s)" % ( filter_baseline, filter_contender, args.test[0].name, ) elif args.mode == "benchmarksfiltered": test_baseline = args.test_baseline[0].name test_contender = args.test_contender[0].name filter_baseline = args.filter_baseline[0] filter_contender = args.filter_contender[0] # NOTE: if test_baseline == test_contender and # filter_baseline == filter_contender, you are analyzing the stdev description = "Comparing %s (from %s) to %s (from %s)" % ( filter_baseline, test_baseline, filter_contender, test_contender, ) else: # should never happen print("Unrecognized mode of operation: '%s'" % args.mode) parser.print_help() exit(1) check_inputs(test_baseline, test_contender, benchmark_options) if args.display_aggregates_only: benchmark_options += ["--benchmark_display_aggregates_only=true"] options_baseline = [] options_contender = [] if filter_baseline and filter_contender: options_baseline = ["--benchmark_filter=%s" % filter_baseline] options_contender = ["--benchmark_filter=%s" % filter_contender] # Run the benchmarks and report the results json1 = json1_orig = gbench.util.sort_benchmark_results( gbench.util.run_or_load_benchmark( test_baseline, benchmark_options + options_baseline ) ) json2 = json2_orig = gbench.util.sort_benchmark_results( gbench.util.run_or_load_benchmark( test_contender, benchmark_options + options_contender ) ) # Now, filter the benchmarks so that the difference report can work if filter_baseline and filter_contender: replacement = "[%s vs. %s]" % (filter_baseline, filter_contender) json1 = gbench.report.filter_benchmark( json1_orig, filter_baseline, replacement ) json2 = gbench.report.filter_benchmark( json2_orig, filter_contender, replacement ) diff_report = gbench.report.get_difference_report(json1, json2, args.utest) output_lines = gbench.report.print_difference_report( diff_report, args.display_aggregates_only, args.utest, args.utest_alpha, args.color, ) print(description) for ln in output_lines: print(ln) # Optionally, diff and output to JSON if args.dump_to_json is not None: with open(args.dump_to_json, "w") as f_json: json.dump(diff_report, f_json, indent=1) class TestParser(unittest.TestCase): def setUp(self): self.parser = create_parser() testInputs = os.path.join( os.path.dirname(os.path.realpath(__file__)), "gbench", "Inputs" ) self.testInput0 = os.path.join(testInputs, "test1_run1.json") self.testInput1 = os.path.join(testInputs, "test1_run2.json") def test_benchmarks_basic(self): parsed = self.parser.parse_args( ["benchmarks", self.testInput0, self.testInput1] ) self.assertFalse(parsed.display_aggregates_only) self.assertTrue(parsed.utest) self.assertEqual(parsed.mode, "benchmarks") self.assertEqual(parsed.test_baseline[0].name, self.testInput0) self.assertEqual(parsed.test_contender[0].name, self.testInput1) self.assertFalse(parsed.benchmark_options) def test_benchmarks_basic_without_utest(self): parsed = self.parser.parse_args( ["--no-utest", "benchmarks", self.testInput0, self.testInput1] ) self.assertFalse(parsed.display_aggregates_only) self.assertFalse(parsed.utest) self.assertEqual(parsed.utest_alpha, 0.05) self.assertEqual(parsed.mode, "benchmarks") self.assertEqual(parsed.test_baseline[0].name, self.testInput0) self.assertEqual(parsed.test_contender[0].name, self.testInput1) self.assertFalse(parsed.benchmark_options) def test_benchmarks_basic_display_aggregates_only(self): parsed = self.parser.parse_args( ["-a", "benchmarks", self.testInput0, self.testInput1] ) self.assertTrue(parsed.display_aggregates_only) self.assertTrue(parsed.utest) self.assertEqual(parsed.mode, "benchmarks") self.assertEqual(parsed.test_baseline[0].name, self.testInput0) self.assertEqual(parsed.test_contender[0].name, self.testInput1) self.assertFalse(parsed.benchmark_options) def test_benchmarks_basic_with_utest_alpha(self): parsed = self.parser.parse_args( ["--alpha=0.314", "benchmarks", self.testInput0, self.testInput1] ) self.assertFalse(parsed.display_aggregates_only) self.assertTrue(parsed.utest) self.assertEqual(parsed.utest_alpha, 0.314) self.assertEqual(parsed.mode, "benchmarks") self.assertEqual(parsed.test_baseline[0].name, self.testInput0) self.assertEqual(parsed.test_contender[0].name, self.testInput1) self.assertFalse(parsed.benchmark_options) def test_benchmarks_basic_without_utest_with_utest_alpha(self): parsed = self.parser.parse_args( [ "--no-utest", "--alpha=0.314", "benchmarks", self.testInput0, self.testInput1, ] ) self.assertFalse(parsed.display_aggregates_only) self.assertFalse(parsed.utest) self.assertEqual(parsed.utest_alpha, 0.314) self.assertEqual(parsed.mode, "benchmarks") self.assertEqual(parsed.test_baseline[0].name, self.testInput0) self.assertEqual(parsed.test_contender[0].name, self.testInput1) self.assertFalse(parsed.benchmark_options) def test_benchmarks_with_remainder(self): parsed = self.parser.parse_args( ["benchmarks", self.testInput0, self.testInput1, "d"] ) self.assertFalse(parsed.display_aggregates_only) self.assertTrue(parsed.utest) self.assertEqual(parsed.mode, "benchmarks") self.assertEqual(parsed.test_baseline[0].name, self.testInput0) self.assertEqual(parsed.test_contender[0].name, self.testInput1) self.assertEqual(parsed.benchmark_options, ["d"]) def test_benchmarks_with_remainder_after_doubleminus(self): parsed = self.parser.parse_args( ["benchmarks", self.testInput0, self.testInput1, "--", "e"] ) self.assertFalse(parsed.display_aggregates_only) self.assertTrue(parsed.utest) self.assertEqual(parsed.mode, "benchmarks") self.assertEqual(parsed.test_baseline[0].name, self.testInput0) self.assertEqual(parsed.test_contender[0].name, self.testInput1) self.assertEqual(parsed.benchmark_options, ["e"]) def test_filters_basic(self): parsed = self.parser.parse_args(["filters", self.testInput0, "c", "d"]) self.assertFalse(parsed.display_aggregates_only) self.assertTrue(parsed.utest) self.assertEqual(parsed.mode, "filters") self.assertEqual(parsed.test[0].name, self.testInput0) self.assertEqual(parsed.filter_baseline[0], "c") self.assertEqual(parsed.filter_contender[0], "d") self.assertFalse(parsed.benchmark_options) def test_filters_with_remainder(self): parsed = self.parser.parse_args( ["filters", self.testInput0, "c", "d", "e"] ) self.assertFalse(parsed.display_aggregates_only) self.assertTrue(parsed.utest) self.assertEqual(parsed.mode, "filters") self.assertEqual(parsed.test[0].name, self.testInput0) self.assertEqual(parsed.filter_baseline[0], "c") self.assertEqual(parsed.filter_contender[0], "d") self.assertEqual(parsed.benchmark_options, ["e"]) def test_filters_with_remainder_after_doubleminus(self): parsed = self.parser.parse_args( ["filters", self.testInput0, "c", "d", "--", "f"] ) self.assertFalse(parsed.display_aggregates_only) self.assertTrue(parsed.utest) self.assertEqual(parsed.mode, "filters") self.assertEqual(parsed.test[0].name, self.testInput0) self.assertEqual(parsed.filter_baseline[0], "c") self.assertEqual(parsed.filter_contender[0], "d") self.assertEqual(parsed.benchmark_options, ["f"]) def test_benchmarksfiltered_basic(self): parsed = self.parser.parse_args( ["benchmarksfiltered", self.testInput0, "c", self.testInput1, "e"] ) self.assertFalse(parsed.display_aggregates_only) self.assertTrue(parsed.utest) self.assertEqual(parsed.mode, "benchmarksfiltered") self.assertEqual(parsed.test_baseline[0].name, self.testInput0) self.assertEqual(parsed.filter_baseline[0], "c") self.assertEqual(parsed.test_contender[0].name, self.testInput1) self.assertEqual(parsed.filter_contender[0], "e") self.assertFalse(parsed.benchmark_options) def test_benchmarksfiltered_with_remainder(self): parsed = self.parser.parse_args( [ "benchmarksfiltered", self.testInput0, "c", self.testInput1, "e", "f", ] ) self.assertFalse(parsed.display_aggregates_only) self.assertTrue(parsed.utest) self.assertEqual(parsed.mode, "benchmarksfiltered") self.assertEqual(parsed.test_baseline[0].name, self.testInput0) self.assertEqual(parsed.filter_baseline[0], "c") self.assertEqual(parsed.test_contender[0].name, self.testInput1) self.assertEqual(parsed.filter_contender[0], "e") self.assertEqual(parsed.benchmark_options[0], "f") def test_benchmarksfiltered_with_remainder_after_doubleminus(self): parsed = self.parser.parse_args( [ "benchmarksfiltered", self.testInput0, "c", self.testInput1, "e", "--", "g", ] ) self.assertFalse(parsed.display_aggregates_only) self.assertTrue(parsed.utest) self.assertEqual(parsed.mode, "benchmarksfiltered") self.assertEqual(parsed.test_baseline[0].name, self.testInput0) self.assertEqual(parsed.filter_baseline[0], "c") self.assertEqual(parsed.test_contender[0].name, self.testInput1) self.assertEqual(parsed.filter_contender[0], "e") self.assertEqual(parsed.benchmark_options[0], "g") if __name__ == "__main__": # unittest.main() main() # vim: tabstop=4 expandtab shiftwidth=4 softtabstop=4 # kate: tab-width: 4; replace-tabs on; indent-width 4; tab-indents: off; # kate: indent-mode python; remove-trailing-spaces modified; ================================================ FILE: tools/gbench/Inputs/test1_run1.json ================================================ { "context": { "date": "2016-08-02 17:44:46", "num_cpus": 4, "mhz_per_cpu": 4228, "cpu_scaling_enabled": false, "library_build_type": "release" }, "benchmarks": [ { "name": "BM_SameTimes", "iterations": 1000, "real_time": 10, "cpu_time": 10, "time_unit": "ns" }, { "name": "BM_2xFaster", "iterations": 1000, "real_time": 50, "cpu_time": 50, "time_unit": "ns" }, { "name": "BM_2xSlower", "iterations": 1000, "real_time": 50, "cpu_time": 50, "time_unit": "ns" }, { "name": "BM_1PercentFaster", "iterations": 1000, "real_time": 100, "cpu_time": 100, "time_unit": "ns" }, { "name": "BM_1PercentSlower", "iterations": 1000, "real_time": 100, "cpu_time": 100, "time_unit": "ns" }, { "name": "BM_10PercentFaster", "iterations": 1000, "real_time": 100, "cpu_time": 100, "time_unit": "ns" }, { "name": "BM_10PercentSlower", "iterations": 1000, "real_time": 100, "cpu_time": 100, "time_unit": "ns" }, { "name": "BM_100xSlower", "iterations": 1000, "real_time": 100, "cpu_time": 100, "time_unit": "ns" }, { "name": "BM_100xFaster", "iterations": 1000, "real_time": 10000, "cpu_time": 10000, "time_unit": "ns" }, { "name": "BM_10PercentCPUToTime", "iterations": 1000, "real_time": 100, "cpu_time": 100, "time_unit": "ns" }, { "name": "BM_ThirdFaster", "iterations": 1000, "real_time": 100, "cpu_time": 100, "time_unit": "ns" }, { "name": "MyComplexityTest_BigO", "run_name": "MyComplexityTest", "run_type": "aggregate", "aggregate_name": "BigO", "cpu_coefficient": 4.2749856294592886e+00, "real_coefficient": 6.4789275289789780e+00, "big_o": "N", "time_unit": "ns" }, { "name": "MyComplexityTest_RMS", "run_name": "MyComplexityTest", "run_type": "aggregate", "aggregate_name": "RMS", "rms": 4.5097802512472874e-03 }, { "name": "BM_NotBadTimeUnit", "iterations": 1000, "real_time": 0.4, "cpu_time": 0.5, "time_unit": "s" }, { "name": "BM_DifferentTimeUnit", "iterations": 1, "real_time": 1, "cpu_time": 1, "time_unit": "s" }, { "name": "BM_hasLabel", "label": "a label", "iterations": 1, "real_time": 1, "cpu_time": 1, "time_unit": "s" } ] } ================================================ FILE: tools/gbench/Inputs/test1_run2.json ================================================ { "context": { "date": "2016-08-02 17:44:46", "num_cpus": 4, "mhz_per_cpu": 4228, "cpu_scaling_enabled": false, "library_build_type": "release" }, "benchmarks": [ { "name": "BM_SameTimes", "iterations": 1000, "real_time": 10, "cpu_time": 10, "time_unit": "ns" }, { "name": "BM_2xFaster", "iterations": 1000, "real_time": 25, "cpu_time": 25, "time_unit": "ns" }, { "name": "BM_2xSlower", "iterations": 20833333, "real_time": 100, "cpu_time": 100, "time_unit": "ns" }, { "name": "BM_1PercentFaster", "iterations": 1000, "real_time": 98.9999999, "cpu_time": 98.9999999, "time_unit": "ns" }, { "name": "BM_1PercentSlower", "iterations": 1000, "real_time": 100.9999999, "cpu_time": 100.9999999, "time_unit": "ns" }, { "name": "BM_10PercentFaster", "iterations": 1000, "real_time": 90, "cpu_time": 90, "time_unit": "ns" }, { "name": "BM_10PercentSlower", "iterations": 1000, "real_time": 110, "cpu_time": 110, "time_unit": "ns" }, { "name": "BM_100xSlower", "iterations": 1000, "real_time": 1.0000e+04, "cpu_time": 1.0000e+04, "time_unit": "ns" }, { "name": "BM_100xFaster", "iterations": 1000, "real_time": 100, "cpu_time": 100, "time_unit": "ns" }, { "name": "BM_10PercentCPUToTime", "iterations": 1000, "real_time": 110, "cpu_time": 90, "time_unit": "ns" }, { "name": "BM_ThirdFaster", "iterations": 1000, "real_time": 66.665, "cpu_time": 66.664, "time_unit": "ns" }, { "name": "MyComplexityTest_BigO", "run_name": "MyComplexityTest", "run_type": "aggregate", "aggregate_name": "BigO", "cpu_coefficient": 5.6215779594361486e+00, "real_coefficient": 5.6288314793554610e+00, "big_o": "N", "time_unit": "ns" }, { "name": "MyComplexityTest_RMS", "run_name": "MyComplexityTest", "run_type": "aggregate", "aggregate_name": "RMS", "rms": 3.3128901852342174e-03 }, { "name": "BM_NotBadTimeUnit", "iterations": 1000, "real_time": 0.04, "cpu_time": 0.6, "time_unit": "s" }, { "name": "BM_DifferentTimeUnit", "iterations": 1, "real_time": 1, "cpu_time": 1, "time_unit": "ns" }, { "name": "BM_hasLabel", "label": "a label", "iterations": 1, "real_time": 1, "cpu_time": 1, "time_unit": "s" } ] } ================================================ FILE: tools/gbench/Inputs/test2_run.json ================================================ { "context": { "date": "2016-08-02 17:44:46", "num_cpus": 4, "mhz_per_cpu": 4228, "cpu_scaling_enabled": false, "library_build_type": "release" }, "benchmarks": [ { "name": "BM_Hi", "iterations": 1234, "real_time": 42, "cpu_time": 24, "time_unit": "ms" }, { "name": "BM_Zero", "iterations": 1000, "real_time": 10, "cpu_time": 10, "time_unit": "ns" }, { "name": "BM_Zero/4", "iterations": 4000, "real_time": 40, "cpu_time": 40, "time_unit": "ns" }, { "name": "Prefix/BM_Zero", "iterations": 2000, "real_time": 20, "cpu_time": 20, "time_unit": "ns" }, { "name": "Prefix/BM_Zero/3", "iterations": 3000, "real_time": 30, "cpu_time": 30, "time_unit": "ns" }, { "name": "BM_One", "iterations": 5000, "real_time": 5, "cpu_time": 5, "time_unit": "ns" }, { "name": "BM_One/4", "iterations": 2000, "real_time": 20, "cpu_time": 20, "time_unit": "ns" }, { "name": "Prefix/BM_One", "iterations": 1000, "real_time": 10, "cpu_time": 10, "time_unit": "ns" }, { "name": "Prefix/BM_One/3", "iterations": 1500, "real_time": 15, "cpu_time": 15, "time_unit": "ns" }, { "name": "BM_Bye", "iterations": 5321, "real_time": 11, "cpu_time": 63, "time_unit": "ns" } ] } ================================================ FILE: tools/gbench/Inputs/test3_run0.json ================================================ { "context": { "date": "2016-08-02 17:44:46", "num_cpus": 4, "mhz_per_cpu": 4228, "cpu_scaling_enabled": false, "library_build_type": "release" }, "benchmarks": [ { "name": "BM_One", "run_type": "aggregate", "iterations": 1000, "real_time": 10, "cpu_time": 100, "time_unit": "ns" }, { "name": "BM_Two", "iterations": 1000, "real_time": 9, "cpu_time": 90, "time_unit": "ns" }, { "name": "BM_Two", "iterations": 1000, "real_time": 8, "cpu_time": 86, "time_unit": "ns" }, { "name": "short", "run_type": "aggregate", "iterations": 1000, "real_time": 8, "cpu_time": 80, "time_unit": "ns" }, { "name": "short", "run_type": "aggregate", "iterations": 1000, "real_time": 8, "cpu_time": 77, "time_unit": "ns" }, { "name": "medium", "run_type": "iteration", "iterations": 1000, "real_time": 8, "cpu_time": 80, "time_unit": "ns" }, { "name": "medium", "run_type": "iteration", "iterations": 1000, "real_time": 9, "cpu_time": 82, "time_unit": "ns" } ] } ================================================ FILE: tools/gbench/Inputs/test3_run1.json ================================================ { "context": { "date": "2016-08-02 17:44:46", "num_cpus": 4, "mhz_per_cpu": 4228, "cpu_scaling_enabled": false, "library_build_type": "release" }, "benchmarks": [ { "name": "BM_One", "iterations": 1000, "real_time": 9, "cpu_time": 110, "time_unit": "ns" }, { "name": "BM_Two", "run_type": "aggregate", "iterations": 1000, "real_time": 10, "cpu_time": 89, "time_unit": "ns" }, { "name": "BM_Two", "iterations": 1000, "real_time": 7, "cpu_time": 72, "time_unit": "ns" }, { "name": "short", "run_type": "aggregate", "iterations": 1000, "real_time": 7, "cpu_time": 75, "time_unit": "ns" }, { "name": "short", "run_type": "aggregate", "iterations": 762, "real_time": 4.54, "cpu_time": 66.6, "time_unit": "ns" }, { "name": "short", "run_type": "iteration", "iterations": 1000, "real_time": 800, "cpu_time": 1, "time_unit": "ns" }, { "name": "medium", "run_type": "iteration", "iterations": 1200, "real_time": 5, "cpu_time": 53, "time_unit": "ns" } ] } ================================================ FILE: tools/gbench/Inputs/test4_run.json ================================================ { "benchmarks": [ { "name": "99 family 0 instance 0 repetition 0", "run_type": "iteration", "family_index": 0, "per_family_instance_index": 0, "repetition_index": 0 }, { "name": "98 family 0 instance 0 repetition 1", "run_type": "iteration", "family_index": 0, "per_family_instance_index": 0, "repetition_index": 1 }, { "name": "97 family 0 instance 0 aggregate", "run_type": "aggregate", "family_index": 0, "per_family_instance_index": 0, "aggregate_name": "9 aggregate" }, { "name": "96 family 0 instance 1 repetition 0", "run_type": "iteration", "family_index": 0, "per_family_instance_index": 1, "repetition_index": 0 }, { "name": "95 family 0 instance 1 repetition 1", "run_type": "iteration", "family_index": 0, "per_family_instance_index": 1, "repetition_index": 1 }, { "name": "94 family 0 instance 1 aggregate", "run_type": "aggregate", "family_index": 0, "per_family_instance_index": 1, "aggregate_name": "9 aggregate" }, { "name": "93 family 1 instance 0 repetition 0", "run_type": "iteration", "family_index": 1, "per_family_instance_index": 0, "repetition_index": 0 }, { "name": "92 family 1 instance 0 repetition 1", "run_type": "iteration", "family_index": 1, "per_family_instance_index": 0, "repetition_index": 1 }, { "name": "91 family 1 instance 0 aggregate", "run_type": "aggregate", "family_index": 1, "per_family_instance_index": 0, "aggregate_name": "9 aggregate" }, { "name": "90 family 1 instance 1 repetition 0", "run_type": "iteration", "family_index": 1, "per_family_instance_index": 1, "repetition_index": 0 }, { "name": "89 family 1 instance 1 repetition 1", "run_type": "iteration", "family_index": 1, "per_family_instance_index": 1, "repetition_index": 1 }, { "name": "88 family 1 instance 1 aggregate", "run_type": "aggregate", "family_index": 1, "per_family_instance_index": 1, "aggregate_name": "9 aggregate" } ] } ================================================ FILE: tools/gbench/Inputs/test4_run0.json ================================================ { "context": { "date": "2016-08-02 17:44:46", "num_cpus": 4, "mhz_per_cpu": 4228, "cpu_scaling_enabled": false, "library_build_type": "release" }, "benchmarks": [ { "name": "whocares", "run_type": "aggregate", "aggregate_name": "zz", "aggregate_unit": "percentage", "iterations": 1000, "real_time": 0.01, "cpu_time": 0.10, "time_unit": "ns" } ] } ================================================ FILE: tools/gbench/Inputs/test4_run1.json ================================================ { "context": { "date": "2016-08-02 17:44:46", "num_cpus": 4, "mhz_per_cpu": 4228, "cpu_scaling_enabled": false, "library_build_type": "release" }, "benchmarks": [ { "name": "whocares", "run_type": "aggregate", "aggregate_name": "zz", "aggregate_unit": "percentage", "iterations": 1000, "real_time": 0.005, "cpu_time": 0.15, "time_unit": "ns" } ] } ================================================ FILE: tools/gbench/Inputs/test5_run0.json ================================================ { "context": { "date": "2016-08-02 17:44:46", "num_cpus": 4, "mhz_per_cpu": 4228, "cpu_scaling_enabled": false, "library_build_type": "release" }, "benchmarks": [ { "name": "BM_ManyRepetitions", "iterations": 1000, "real_time": 1, "cpu_time": 1000, "time_unit": "s" } ] } ================================================ FILE: tools/gbench/Inputs/test5_run1.json ================================================ { "context": { "date": "2016-08-02 17:44:46", "num_cpus": 4, "mhz_per_cpu": 4228, "cpu_scaling_enabled": false, "library_build_type": "release" }, "benchmarks": [ { "name": "BM_ManyRepetitions", "iterations": 1000, "real_time": 1000, "cpu_time": 1, "time_unit": "s" } ] } ================================================ FILE: tools/gbench/__init__.py ================================================ """Google Benchmark tooling""" __author__ = "Eric Fiselier" __email__ = "eric@efcs.ca" __versioninfo__ = (0, 5, 0) __version__ = ".".join(str(v) for v in __versioninfo__) + "dev" __all__ = [] # type: ignore ================================================ FILE: tools/gbench/report.py ================================================ # type: ignore """ report.py - Utilities for reporting statistics about benchmark results """ import copy import os import random import re import unittest from numpy import array from scipy.stats import gmean, mannwhitneyu class BenchmarkColor: def __init__(self, name, code): self.name = name self.code = code def __repr__(self): return "%s%r" % (self.__class__.__name__, (self.name, self.code)) def __format__(self, format): return self.code # Benchmark Colors Enumeration BC_NONE = BenchmarkColor("NONE", "") BC_MAGENTA = BenchmarkColor("MAGENTA", "\033[95m") BC_CYAN = BenchmarkColor("CYAN", "\033[96m") BC_OKBLUE = BenchmarkColor("OKBLUE", "\033[94m") BC_OKGREEN = BenchmarkColor("OKGREEN", "\033[32m") BC_HEADER = BenchmarkColor("HEADER", "\033[92m") BC_WARNING = BenchmarkColor("WARNING", "\033[93m") BC_WHITE = BenchmarkColor("WHITE", "\033[97m") BC_FAIL = BenchmarkColor("FAIL", "\033[91m") BC_ENDC = BenchmarkColor("ENDC", "\033[0m") BC_BOLD = BenchmarkColor("BOLD", "\033[1m") BC_UNDERLINE = BenchmarkColor("UNDERLINE", "\033[4m") UTEST_MIN_REPETITIONS = 2 UTEST_OPTIMAL_REPETITIONS = 9 # Lowest reasonable number, More is better. UTEST_COL_NAME = "_pvalue" _TIME_UNIT_TO_SECONDS_MULTIPLIER = { "s": 1.0, "ms": 1e-3, "us": 1e-6, "ns": 1e-9, } def color_format(use_color, fmt_str, *args, **kwargs): """ Return the result of 'fmt_str.format(*args, **kwargs)' after transforming 'args' and 'kwargs' according to the value of 'use_color'. If 'use_color' is False then all color codes in 'args' and 'kwargs' are replaced with the empty string. """ assert use_color is True or use_color is False if not use_color: args = [ arg if not isinstance(arg, BenchmarkColor) else BC_NONE for arg in args ] kwargs = { key: arg if not isinstance(arg, BenchmarkColor) else BC_NONE for key, arg in kwargs.items() } return fmt_str.format(*args, **kwargs) def find_longest_name(benchmark_list): """ Return the length of the longest benchmark name in a given list of benchmark JSON objects """ longest_name = 1 for bc in benchmark_list: if len(bc["name"]) > longest_name: longest_name = len(bc["name"]) return longest_name def calculate_change(old_val, new_val): """ Return a float representing the decimal change between old_val and new_val. """ if old_val == 0 and new_val == 0: return 0.0 if old_val == 0: return float(new_val - old_val) / (float(old_val + new_val) / 2) return float(new_val - old_val) / abs(old_val) def filter_benchmark(json_orig, family, replacement=""): """ Apply a filter to the json, and only leave the 'family' of benchmarks. """ regex = re.compile(family) filtered = {} filtered["benchmarks"] = [] for be in json_orig["benchmarks"]: if not regex.search(be["name"]): continue filteredbench = copy.deepcopy(be) # Do NOT modify the old name! filteredbench["name"] = regex.sub(replacement, filteredbench["name"]) filtered["benchmarks"].append(filteredbench) return filtered def get_unique_benchmark_names(json): """ While *keeping* the order, give all the unique 'names' used for benchmarks. """ seen = set() uniqued = [ x["name"] for x in json["benchmarks"] if x["name"] not in seen and (seen.add(x["name"]) or True) ] return uniqued def intersect(list1, list2): """ Given two lists, get a new list consisting of the elements only contained in *both of the input lists*, while preserving the ordering. """ return [x for x in list1 if x in list2] def is_potentially_comparable_benchmark(x): return "time_unit" in x and "real_time" in x and "cpu_time" in x def partition_benchmarks(json1, json2): """ While preserving the ordering, find benchmarks with the same names in both of the inputs, and group them. (i.e. partition/filter into groups with common name) """ json1_unique_names = get_unique_benchmark_names(json1) json2_unique_names = get_unique_benchmark_names(json2) names = intersect(json1_unique_names, json2_unique_names) partitions = [] for name in names: time_unit = None # Pick the time unit from the first entry of the lhs benchmark. # We should be careful not to crash with unexpected input. for x in json1["benchmarks"]: if x["name"] == name and is_potentially_comparable_benchmark(x): time_unit = x["time_unit"] break if time_unit is None: continue # Filter by name and time unit. # All the repetitions are assumed to be comparable. lhs = [ x for x in json1["benchmarks"] if x["name"] == name and x["time_unit"] == time_unit ] rhs = [ x for x in json2["benchmarks"] if x["name"] == name and x["time_unit"] == time_unit ] partitions.append([lhs, rhs]) return partitions def get_timedelta_field_as_seconds(benchmark, field_name): """ Get value of field_name field of benchmark, which is time with time unit time_unit, as time in seconds. """ timedelta = benchmark[field_name] time_unit = benchmark.get("time_unit", "s") return timedelta * _TIME_UNIT_TO_SECONDS_MULTIPLIER.get(time_unit) def calculate_geomean(json): """ Extract all real/cpu times from all the benchmarks as seconds, and calculate their geomean. """ times = [] for benchmark in json["benchmarks"]: if "run_type" in benchmark and benchmark["run_type"] == "aggregate": continue times.append( [ get_timedelta_field_as_seconds(benchmark, "real_time"), get_timedelta_field_as_seconds(benchmark, "cpu_time"), ] ) return gmean(times) if times else array([]) def extract_field(partition, field_name): # The count of elements may be different. We want *all* of them. lhs = [x[field_name] for x in partition[0]] rhs = [x[field_name] for x in partition[1]] return [lhs, rhs] def calc_utest(timings_cpu, timings_time): min_rep_cnt = min( len(timings_time[0]), len(timings_time[1]), len(timings_cpu[0]), len(timings_cpu[1]), ) # Does *everything* has at least UTEST_MIN_REPETITIONS repetitions? if min_rep_cnt < UTEST_MIN_REPETITIONS: return False, None, None time_pvalue = mannwhitneyu( timings_time[0], timings_time[1], alternative="two-sided" ).pvalue cpu_pvalue = mannwhitneyu( timings_cpu[0], timings_cpu[1], alternative="two-sided" ).pvalue return (min_rep_cnt >= UTEST_OPTIMAL_REPETITIONS), cpu_pvalue, time_pvalue def print_utest(bc_name, utest, utest_alpha, first_col_width, use_color=True): def get_utest_color(pval): return BC_FAIL if pval >= utest_alpha else BC_OKGREEN # Check if we failed miserably with minimum required repetitions for utest if ( not utest["have_optimal_repetitions"] and utest["cpu_pvalue"] is None and utest["time_pvalue"] is None ): return [] dsc = "U Test, Repetitions: {} vs {}".format( utest["nr_of_repetitions"], utest["nr_of_repetitions_other"] ) dsc_color = BC_OKGREEN # We still got some results to show but issue a warning about it. if not utest["have_optimal_repetitions"]: dsc_color = BC_WARNING dsc += ( f". WARNING: Results unreliable! {UTEST_OPTIMAL_REPETITIONS}+" " repetitions recommended." ) special_str = "{}{:<{}s}{endc}{}{:16.4f}{endc}{}{:16.4f}{endc}{} {}" return [ color_format( use_color, special_str, BC_HEADER, f"{bc_name}{UTEST_COL_NAME}", first_col_width, get_utest_color(utest["time_pvalue"]), utest["time_pvalue"], get_utest_color(utest["cpu_pvalue"]), utest["cpu_pvalue"], dsc_color, dsc, endc=BC_ENDC, ) ] def get_difference_report(json1, json2, utest=False): """ Calculate and report the difference between each test of two benchmarks runs specified as 'json1' and 'json2'. Output is another json containing relevant details for each test run. """ assert utest is True or utest is False diff_report = [] partitions = partition_benchmarks(json1, json2) for partition in partitions: benchmark_name = partition[0][0]["name"] label = partition[0][0].get("label", "") time_unit = partition[0][0]["time_unit"] measurements = [] utest_results = {} # Careful, we may have different repetition count. for i in range(min(len(partition[0]), len(partition[1]))): bn = partition[0][i] other_bench = partition[1][i] measurements.append( { "real_time": bn["real_time"], "cpu_time": bn["cpu_time"], "real_time_other": other_bench["real_time"], "cpu_time_other": other_bench["cpu_time"], "time": calculate_change( bn["real_time"], other_bench["real_time"] ), "cpu": calculate_change( bn["cpu_time"], other_bench["cpu_time"] ), } ) # After processing the whole partition, if requested, do the U test. if utest: timings_cpu = extract_field(partition, "cpu_time") timings_time = extract_field(partition, "real_time") have_optimal_repetitions, cpu_pvalue, time_pvalue = calc_utest( timings_cpu, timings_time ) if cpu_pvalue is not None and time_pvalue is not None: utest_results = { "have_optimal_repetitions": have_optimal_repetitions, "cpu_pvalue": cpu_pvalue, "time_pvalue": time_pvalue, "nr_of_repetitions": len(timings_cpu[0]), "nr_of_repetitions_other": len(timings_cpu[1]), } # Store only if we had any measurements for given benchmark. # E.g. partition_benchmarks will filter out the benchmarks having # time units which are not compatible with other time units in the # benchmark suite. if measurements: run_type = partition[0][0].get("run_type", "") aggregate_name = ( partition[0][0]["aggregate_name"] if run_type == "aggregate" and "aggregate_name" in partition[0][0] else "" ) diff_report.append( { "name": benchmark_name, "label": label, "measurements": measurements, "time_unit": time_unit, "run_type": run_type, "aggregate_name": aggregate_name, "utest": utest_results, } ) lhs_gmean = calculate_geomean(json1) rhs_gmean = calculate_geomean(json2) if lhs_gmean.any() and rhs_gmean.any(): diff_report.append( { "name": "OVERALL_GEOMEAN", "label": "", "measurements": [ { "real_time": lhs_gmean[0], "cpu_time": lhs_gmean[1], "real_time_other": rhs_gmean[0], "cpu_time_other": rhs_gmean[1], "time": calculate_change(lhs_gmean[0], rhs_gmean[0]), "cpu": calculate_change(lhs_gmean[1], rhs_gmean[1]), } ], "time_unit": "s", "run_type": "aggregate", "aggregate_name": "geomean", "utest": {}, } ) return diff_report def print_difference_report( json_diff_report, include_aggregates_only=False, utest=False, utest_alpha=0.05, use_color=True, ): """ Calculate and report the difference between each test of two benchmarks runs specified as 'json1' and 'json2'. """ assert utest is True or utest is False def get_color(res): if res > 0.05: return BC_FAIL elif res > -0.07: return BC_WHITE else: return BC_CYAN first_col_width = find_longest_name(json_diff_report) first_col_width = max(first_col_width, len("Benchmark")) first_col_width += len(UTEST_COL_NAME) fmt_str = ( "{:<{}s}Time CPU Time Old Time New CPU Old" " CPU New" ) first_line = fmt_str.format("Benchmark", 12 + first_col_width) output_strs = [first_line, "-" * len(first_line)] fmt_str = ( "{}{:<{}s}{endc}{}{:+16.4f}{endc}{}{:+16.4f}{endc}{:14.0f}{:14.0f}" "{endc}{:14.0f}{:14.0f}" ) for benchmark in json_diff_report: # *If* we were asked to only include aggregates, # and if it is non-aggregate, then don't print it. if ( not include_aggregates_only or "run_type" not in benchmark or benchmark["run_type"] == "aggregate" ): for measurement in benchmark["measurements"]: output_strs += [ color_format( use_color, fmt_str, BC_HEADER, benchmark["name"], first_col_width, get_color(measurement["time"]), measurement["time"], get_color(measurement["cpu"]), measurement["cpu"], measurement["real_time"], measurement["real_time_other"], measurement["cpu_time"], measurement["cpu_time_other"], endc=BC_ENDC, ) ] # After processing the measurements, if requested and # if applicable (e.g. u-test exists for given benchmark), # print the U test. if utest and benchmark["utest"]: output_strs += print_utest( benchmark["name"], benchmark["utest"], utest_alpha=utest_alpha, first_col_width=first_col_width, use_color=use_color, ) return output_strs ############################################################################### # Unit tests class TestGetUniqueBenchmarkNames(unittest.TestCase): def load_results(self): import json testInputs = os.path.join( os.path.dirname(os.path.realpath(__file__)), "Inputs" ) testOutput = os.path.join(testInputs, "test3_run0.json") with open(testOutput) as f: json = json.load(f) return json def test_basic(self): expect_lines = [ "BM_One", "BM_Two", "short", # These two are not sorted "medium", # These two are not sorted ] json = self.load_results() output_lines = get_unique_benchmark_names(json) print("\n") print("\n".join(output_lines)) self.assertEqual(len(output_lines), len(expect_lines)) for i, output_line in enumerate(output_lines): self.assertEqual(expect_lines[i], output_line) class TestReportDifference(unittest.TestCase): @classmethod def setUpClass(cls): def load_results(): import json testInputs = os.path.join( os.path.dirname(os.path.realpath(__file__)), "Inputs" ) testOutput1 = os.path.join(testInputs, "test1_run1.json") testOutput2 = os.path.join(testInputs, "test1_run2.json") with open(testOutput1) as f: json1 = json.load(f) with open(testOutput2) as f: json2 = json.load(f) return json1, json2 json1, json2 = load_results() cls.json_diff_report = get_difference_report(json1, json2) def test_json_diff_report_pretty_printing(self): expect_lines = [ ["BM_SameTimes", "+0.0000", "+0.0000", "10", "10", "10", "10"], ["BM_2xFaster", "-0.5000", "-0.5000", "50", "25", "50", "25"], ["BM_2xSlower", "+1.0000", "+1.0000", "50", "100", "50", "100"], [ "BM_1PercentFaster", "-0.0100", "-0.0100", "100", "99", "100", "99", ], [ "BM_1PercentSlower", "+0.0100", "+0.0100", "100", "101", "100", "101", ], [ "BM_10PercentFaster", "-0.1000", "-0.1000", "100", "90", "100", "90", ], [ "BM_10PercentSlower", "+0.1000", "+0.1000", "100", "110", "100", "110", ], [ "BM_100xSlower", "+99.0000", "+99.0000", "100", "10000", "100", "10000", ], [ "BM_100xFaster", "-0.9900", "-0.9900", "10000", "100", "10000", "100", ], [ "BM_10PercentCPUToTime", "+0.1000", "-0.1000", "100", "110", "100", "90", ], ["BM_ThirdFaster", "-0.3333", "-0.3334", "100", "67", "100", "67"], ["BM_NotBadTimeUnit", "-0.9000", "+0.2000", "0", "0", "0", "1"], ["BM_hasLabel", "+0.0000", "+0.0000", "1", "1", "1", "1"], ["OVERALL_GEOMEAN", "-0.8113", "-0.7779", "0", "0", "0", "0"], ] output_lines_with_header = print_difference_report( self.json_diff_report, use_color=False ) output_lines = output_lines_with_header[2:] print("\n") print("\n".join(output_lines_with_header)) self.assertEqual(len(output_lines), len(expect_lines)) for i, output_line in enumerate(output_lines): parts = [x for x in output_line.split(" ") if x] self.assertEqual(len(parts), 7) self.assertEqual(expect_lines[i], parts) def test_json_diff_report_output(self): expected_output = [ { "name": "BM_SameTimes", "label": "", "measurements": [ { "time": 0.0000, "cpu": 0.0000, "real_time": 10, "real_time_other": 10, "cpu_time": 10, "cpu_time_other": 10, } ], "time_unit": "ns", "utest": {}, }, { "name": "BM_2xFaster", "label": "", "measurements": [ { "time": -0.5000, "cpu": -0.5000, "real_time": 50, "real_time_other": 25, "cpu_time": 50, "cpu_time_other": 25, } ], "time_unit": "ns", "utest": {}, }, { "name": "BM_2xSlower", "label": "", "measurements": [ { "time": 1.0000, "cpu": 1.0000, "real_time": 50, "real_time_other": 100, "cpu_time": 50, "cpu_time_other": 100, } ], "time_unit": "ns", "utest": {}, }, { "name": "BM_1PercentFaster", "label": "", "measurements": [ { "time": -0.0100, "cpu": -0.0100, "real_time": 100, "real_time_other": 98.9999999, "cpu_time": 100, "cpu_time_other": 98.9999999, } ], "time_unit": "ns", "utest": {}, }, { "name": "BM_1PercentSlower", "label": "", "measurements": [ { "time": 0.0100, "cpu": 0.0100, "real_time": 100, "real_time_other": 101, "cpu_time": 100, "cpu_time_other": 101, } ], "time_unit": "ns", "utest": {}, }, { "name": "BM_10PercentFaster", "label": "", "measurements": [ { "time": -0.1000, "cpu": -0.1000, "real_time": 100, "real_time_other": 90, "cpu_time": 100, "cpu_time_other": 90, } ], "time_unit": "ns", "utest": {}, }, { "name": "BM_10PercentSlower", "label": "", "measurements": [ { "time": 0.1000, "cpu": 0.1000, "real_time": 100, "real_time_other": 110, "cpu_time": 100, "cpu_time_other": 110, } ], "time_unit": "ns", "utest": {}, }, { "name": "BM_100xSlower", "label": "", "measurements": [ { "time": 99.0000, "cpu": 99.0000, "real_time": 100, "real_time_other": 10000, "cpu_time": 100, "cpu_time_other": 10000, } ], "time_unit": "ns", "utest": {}, }, { "name": "BM_100xFaster", "label": "", "measurements": [ { "time": -0.9900, "cpu": -0.9900, "real_time": 10000, "real_time_other": 100, "cpu_time": 10000, "cpu_time_other": 100, } ], "time_unit": "ns", "utest": {}, }, { "name": "BM_10PercentCPUToTime", "label": "", "measurements": [ { "time": 0.1000, "cpu": -0.1000, "real_time": 100, "real_time_other": 110, "cpu_time": 100, "cpu_time_other": 90, } ], "time_unit": "ns", "utest": {}, }, { "name": "BM_ThirdFaster", "label": "", "measurements": [ { "time": -0.3333, "cpu": -0.3334, "real_time": 100, "real_time_other": 67, "cpu_time": 100, "cpu_time_other": 67, } ], "time_unit": "ns", "utest": {}, }, { "name": "BM_NotBadTimeUnit", "label": "", "measurements": [ { "time": -0.9000, "cpu": 0.2000, "real_time": 0.4, "real_time_other": 0.04, "cpu_time": 0.5, "cpu_time_other": 0.6, } ], "time_unit": "s", "utest": {}, }, { "name": "BM_hasLabel", "label": "a label", "measurements": [ { "time": 0.0000, "cpu": 0.0000, "real_time": 1, "real_time_other": 1, "cpu_time": 1, "cpu_time_other": 1, } ], "time_unit": "s", "utest": {}, }, { "name": "OVERALL_GEOMEAN", "label": "", "measurements": [ { "real_time": 3.1622776601683826e-06, "cpu_time": 3.2130844755623912e-06, "real_time_other": 1.9768988699420897e-07, "cpu_time_other": 2.397447755209533e-07, "time": -0.8112976497120911, "cpu": -0.7778551721181174, } ], "time_unit": "s", "run_type": "aggregate", "aggregate_name": "geomean", "utest": {}, }, ] self.assertEqual(len(self.json_diff_report), len(expected_output)) for out, expected in zip( self.json_diff_report, expected_output, strict=True ): self.assertEqual(out["name"], expected["name"]) self.assertEqual(out["label"], expected["label"]) self.assertEqual(out["time_unit"], expected["time_unit"]) assert_utest(self, out, expected) assert_measurements(self, out, expected) class TestReportDifferenceBetweenFamilies(unittest.TestCase): @classmethod def setUpClass(cls): def load_result(): import json testInputs = os.path.join( os.path.dirname(os.path.realpath(__file__)), "Inputs" ) testOutput = os.path.join(testInputs, "test2_run.json") with open(testOutput) as f: json = json.load(f) return json json = load_result() json1 = filter_benchmark(json, "BM_Z.ro", ".") json2 = filter_benchmark(json, "BM_O.e", ".") cls.json_diff_report = get_difference_report(json1, json2) def test_json_diff_report_pretty_printing(self): expect_lines = [ [".", "-0.5000", "-0.5000", "10", "5", "10", "5"], ["./4", "-0.5000", "-0.5000", "40", "20", "40", "20"], ["Prefix/.", "-0.5000", "-0.5000", "20", "10", "20", "10"], ["Prefix/./3", "-0.5000", "-0.5000", "30", "15", "30", "15"], ["OVERALL_GEOMEAN", "-0.5000", "-0.5000", "0", "0", "0", "0"], ] output_lines_with_header = print_difference_report( self.json_diff_report, use_color=False ) output_lines = output_lines_with_header[2:] print("\n") print("\n".join(output_lines_with_header)) self.assertEqual(len(output_lines), len(expect_lines)) for i, output_line in enumerate(output_lines): parts = [x for x in output_line.split(" ") if x] self.assertEqual(len(parts), 7) self.assertEqual(expect_lines[i], parts) def test_json_diff_report(self): expected_output = [ { "name": ".", "measurements": [ { "time": -0.5, "cpu": -0.5, "real_time": 10, "real_time_other": 5, "cpu_time": 10, "cpu_time_other": 5, } ], "time_unit": "ns", "utest": {}, }, { "name": "./4", "measurements": [ { "time": -0.5, "cpu": -0.5, "real_time": 40, "real_time_other": 20, "cpu_time": 40, "cpu_time_other": 20, } ], "time_unit": "ns", "utest": {}, }, { "name": "Prefix/.", "measurements": [ { "time": -0.5, "cpu": -0.5, "real_time": 20, "real_time_other": 10, "cpu_time": 20, "cpu_time_other": 10, } ], "time_unit": "ns", "utest": {}, }, { "name": "Prefix/./3", "measurements": [ { "time": -0.5, "cpu": -0.5, "real_time": 30, "real_time_other": 15, "cpu_time": 30, "cpu_time_other": 15, } ], "time_unit": "ns", "utest": {}, }, { "name": "OVERALL_GEOMEAN", "measurements": [ { "real_time": 2.213363839400641e-08, "cpu_time": 2.213363839400641e-08, "real_time_other": 1.1066819197003185e-08, "cpu_time_other": 1.1066819197003185e-08, "time": -0.5000000000000009, "cpu": -0.5000000000000009, } ], "time_unit": "s", "run_type": "aggregate", "aggregate_name": "geomean", "utest": {}, }, ] self.assertEqual(len(self.json_diff_report), len(expected_output)) for out, expected in zip( self.json_diff_report, expected_output, strict=True ): self.assertEqual(out["name"], expected["name"]) self.assertEqual(out["time_unit"], expected["time_unit"]) assert_utest(self, out, expected) assert_measurements(self, out, expected) class TestReportDifferenceWithUTest(unittest.TestCase): @classmethod def setUpClass(cls): def load_results(): import json testInputs = os.path.join( os.path.dirname(os.path.realpath(__file__)), "Inputs" ) testOutput1 = os.path.join(testInputs, "test3_run0.json") testOutput2 = os.path.join(testInputs, "test3_run1.json") with open(testOutput1) as f: json1 = json.load(f) with open(testOutput2) as f: json2 = json.load(f) return json1, json2 json1, json2 = load_results() cls.json_diff_report = get_difference_report(json1, json2, utest=True) def test_json_diff_report_pretty_printing(self): expect_lines = [ ["BM_One", "-0.1000", "+0.1000", "10", "9", "100", "110"], ["BM_Two", "+0.1111", "-0.0111", "9", "10", "90", "89"], ["BM_Two", "-0.1250", "-0.1628", "8", "7", "86", "72"], [ "BM_Two_pvalue", "1.0000", "0.6667", "U", "Test,", "Repetitions:", "2", "vs", "2.", "WARNING:", "Results", "unreliable!", "9+", "repetitions", "recommended.", ], ["short", "-0.1250", "-0.0625", "8", "7", "80", "75"], ["short", "-0.4325", "-0.1351", "8", "5", "77", "67"], [ "short_pvalue", "0.7671", "0.2000", "U", "Test,", "Repetitions:", "2", "vs", "3.", "WARNING:", "Results", "unreliable!", "9+", "repetitions", "recommended.", ], ["medium", "-0.3750", "-0.3375", "8", "5", "80", "53"], ["OVERALL_GEOMEAN", "+1.6405", "-0.6985", "0", "0", "0", "0"], ] output_lines_with_header = print_difference_report( self.json_diff_report, utest=True, utest_alpha=0.05, use_color=False ) output_lines = output_lines_with_header[2:] print("\n") print("\n".join(output_lines_with_header)) self.assertEqual(len(output_lines), len(expect_lines)) for i, output_line in enumerate(output_lines): parts = [x for x in output_line.split(" ") if x] self.assertEqual(expect_lines[i], parts) def test_json_diff_report_pretty_printing_aggregates_only(self): expect_lines = [ ["BM_One", "-0.1000", "+0.1000", "10", "9", "100", "110"], [ "BM_Two_pvalue", "1.0000", "0.6667", "U", "Test,", "Repetitions:", "2", "vs", "2.", "WARNING:", "Results", "unreliable!", "9+", "repetitions", "recommended.", ], ["short", "-0.1250", "-0.0625", "8", "7", "80", "75"], ["short", "-0.4325", "-0.1351", "8", "5", "77", "67"], [ "short_pvalue", "0.7671", "0.2000", "U", "Test,", "Repetitions:", "2", "vs", "3.", "WARNING:", "Results", "unreliable!", "9+", "repetitions", "recommended.", ], ["OVERALL_GEOMEAN", "+1.6405", "-0.6985", "0", "0", "0", "0"], ] output_lines_with_header = print_difference_report( self.json_diff_report, include_aggregates_only=True, utest=True, utest_alpha=0.05, use_color=False, ) output_lines = output_lines_with_header[2:] print("\n") print("\n".join(output_lines_with_header)) self.assertEqual(len(output_lines), len(expect_lines)) for i, output_line in enumerate(output_lines): parts = [x for x in output_line.split(" ") if x] self.assertEqual(expect_lines[i], parts) def test_json_diff_report(self): expected_output = [ { "name": "BM_One", "measurements": [ { "time": -0.1, "cpu": 0.1, "real_time": 10, "real_time_other": 9, "cpu_time": 100, "cpu_time_other": 110, } ], "time_unit": "ns", "utest": {}, }, { "name": "BM_Two", "measurements": [ { "time": 0.1111111111111111, "cpu": -0.011111111111111112, "real_time": 9, "real_time_other": 10, "cpu_time": 90, "cpu_time_other": 89, }, { "time": -0.125, "cpu": -0.16279069767441862, "real_time": 8, "real_time_other": 7, "cpu_time": 86, "cpu_time_other": 72, }, ], "time_unit": "ns", "utest": { "have_optimal_repetitions": False, "cpu_pvalue": 0.6666666666666666, "time_pvalue": 1.0, }, }, { "name": "short", "measurements": [ { "time": -0.125, "cpu": -0.0625, "real_time": 8, "real_time_other": 7, "cpu_time": 80, "cpu_time_other": 75, }, { "time": -0.4325, "cpu": -0.13506493506493514, "real_time": 8, "real_time_other": 4.54, "cpu_time": 77, "cpu_time_other": 66.6, }, ], "time_unit": "ns", "utest": { "have_optimal_repetitions": False, "cpu_pvalue": 0.2, "time_pvalue": 0.7670968684102772, }, }, { "name": "medium", "measurements": [ { "time": -0.375, "cpu": -0.3375, "real_time": 8, "real_time_other": 5, "cpu_time": 80, "cpu_time_other": 53, } ], "time_unit": "ns", "utest": {}, }, { "name": "OVERALL_GEOMEAN", "measurements": [ { "real_time": 8.48528137423858e-09, "cpu_time": 8.441336246629233e-08, "real_time_other": 2.2405267593145244e-08, "cpu_time_other": 2.5453661413660466e-08, "time": 1.6404861082353634, "cpu": -0.6984640740519662, } ], "time_unit": "s", "run_type": "aggregate", "aggregate_name": "geomean", "utest": {}, }, ] self.assertEqual(len(self.json_diff_report), len(expected_output)) for out, expected in zip( self.json_diff_report, expected_output, strict=True ): self.assertEqual(out["name"], expected["name"]) self.assertEqual(out["time_unit"], expected["time_unit"]) assert_utest(self, out, expected) assert_measurements(self, out, expected) class TestReportDifferenceWithUTestWhileDisplayingAggregatesOnly( unittest.TestCase ): @classmethod def setUpClass(cls): def load_results(): import json testInputs = os.path.join( os.path.dirname(os.path.realpath(__file__)), "Inputs" ) testOutput1 = os.path.join(testInputs, "test3_run0.json") testOutput2 = os.path.join(testInputs, "test3_run1.json") with open(testOutput1) as f: json1 = json.load(f) with open(testOutput2) as f: json2 = json.load(f) return json1, json2 json1, json2 = load_results() cls.json_diff_report = get_difference_report(json1, json2, utest=True) def test_json_diff_report_pretty_printing(self): expect_lines = [ ["BM_One", "-0.1000", "+0.1000", "10", "9", "100", "110"], ["BM_Two", "+0.1111", "-0.0111", "9", "10", "90", "89"], ["BM_Two", "-0.1250", "-0.1628", "8", "7", "86", "72"], [ "BM_Two_pvalue", "1.0000", "0.6667", "U", "Test,", "Repetitions:", "2", "vs", "2.", "WARNING:", "Results", "unreliable!", "9+", "repetitions", "recommended.", ], ["short", "-0.1250", "-0.0625", "8", "7", "80", "75"], ["short", "-0.4325", "-0.1351", "8", "5", "77", "67"], [ "short_pvalue", "0.7671", "0.2000", "U", "Test,", "Repetitions:", "2", "vs", "3.", "WARNING:", "Results", "unreliable!", "9+", "repetitions", "recommended.", ], ["medium", "-0.3750", "-0.3375", "8", "5", "80", "53"], ["OVERALL_GEOMEAN", "+1.6405", "-0.6985", "0", "0", "0", "0"], ] output_lines_with_header = print_difference_report( self.json_diff_report, utest=True, utest_alpha=0.05, use_color=False ) output_lines = output_lines_with_header[2:] print("\n") print("\n".join(output_lines_with_header)) self.assertEqual(len(output_lines), len(expect_lines)) for i, output_line in enumerate(output_lines): parts = [x for x in output_line.split(" ") if x] self.assertEqual(expect_lines[i], parts) def test_json_diff_report(self): expected_output = [ { "name": "BM_One", "measurements": [ { "time": -0.1, "cpu": 0.1, "real_time": 10, "real_time_other": 9, "cpu_time": 100, "cpu_time_other": 110, } ], "time_unit": "ns", "utest": {}, }, { "name": "BM_Two", "measurements": [ { "time": 0.1111111111111111, "cpu": -0.011111111111111112, "real_time": 9, "real_time_other": 10, "cpu_time": 90, "cpu_time_other": 89, }, { "time": -0.125, "cpu": -0.16279069767441862, "real_time": 8, "real_time_other": 7, "cpu_time": 86, "cpu_time_other": 72, }, ], "time_unit": "ns", "utest": { "have_optimal_repetitions": False, "cpu_pvalue": 0.6666666666666666, "time_pvalue": 1.0, }, }, { "name": "short", "measurements": [ { "time": -0.125, "cpu": -0.0625, "real_time": 8, "real_time_other": 7, "cpu_time": 80, "cpu_time_other": 75, }, { "time": -0.4325, "cpu": -0.13506493506493514, "real_time": 8, "real_time_other": 4.54, "cpu_time": 77, "cpu_time_other": 66.6, }, ], "time_unit": "ns", "utest": { "have_optimal_repetitions": False, "cpu_pvalue": 0.2, "time_pvalue": 0.7670968684102772, }, }, { "name": "medium", "measurements": [ { "real_time_other": 5, "cpu_time": 80, "time": -0.375, "real_time": 8, "cpu_time_other": 53, "cpu": -0.3375, } ], "utest": {}, "time_unit": "ns", "aggregate_name": "", }, { "name": "OVERALL_GEOMEAN", "measurements": [ { "real_time": 8.48528137423858e-09, "cpu_time": 8.441336246629233e-08, "real_time_other": 2.2405267593145244e-08, "cpu_time_other": 2.5453661413660466e-08, "time": 1.6404861082353634, "cpu": -0.6984640740519662, } ], "time_unit": "s", "run_type": "aggregate", "aggregate_name": "geomean", "utest": {}, }, ] self.assertEqual(len(self.json_diff_report), len(expected_output)) for out, expected in zip( self.json_diff_report, expected_output, strict=True ): self.assertEqual(out["name"], expected["name"]) self.assertEqual(out["time_unit"], expected["time_unit"]) assert_utest(self, out, expected) assert_measurements(self, out, expected) class TestReportDifferenceForPercentageAggregates(unittest.TestCase): @classmethod def setUpClass(cls): def load_results(): import json testInputs = os.path.join( os.path.dirname(os.path.realpath(__file__)), "Inputs" ) testOutput1 = os.path.join(testInputs, "test4_run0.json") testOutput2 = os.path.join(testInputs, "test4_run1.json") with open(testOutput1) as f: json1 = json.load(f) with open(testOutput2) as f: json2 = json.load(f) return json1, json2 json1, json2 = load_results() cls.json_diff_report = get_difference_report(json1, json2, utest=True) def test_json_diff_report_pretty_printing(self): expect_lines = [["whocares", "-0.5000", "+0.5000", "0", "0", "0", "0"]] output_lines_with_header = print_difference_report( self.json_diff_report, utest=True, utest_alpha=0.05, use_color=False ) output_lines = output_lines_with_header[2:] print("\n") print("\n".join(output_lines_with_header)) self.assertEqual(len(output_lines), len(expect_lines)) for i, output_line in enumerate(output_lines): parts = [x for x in output_line.split(" ") if x] self.assertEqual(expect_lines[i], parts) def test_json_diff_report(self): expected_output = [ { "name": "whocares", "measurements": [ { "time": -0.5, "cpu": 0.5, "real_time": 0.01, "real_time_other": 0.005, "cpu_time": 0.10, "cpu_time_other": 0.15, } ], "time_unit": "ns", "utest": {}, } ] self.assertEqual(len(self.json_diff_report), len(expected_output)) for out, expected in zip( self.json_diff_report, expected_output, strict=True ): self.assertEqual(out["name"], expected["name"]) self.assertEqual(out["time_unit"], expected["time_unit"]) assert_utest(self, out, expected) assert_measurements(self, out, expected) class TestReportSorting(unittest.TestCase): @classmethod def setUpClass(cls): def load_result(): import json testInputs = os.path.join( os.path.dirname(os.path.realpath(__file__)), "Inputs" ) testOutput = os.path.join(testInputs, "test4_run.json") with open(testOutput) as f: json = json.load(f) return json cls.json = load_result() def test_json_diff_report_pretty_printing(self): import util expected_names = [ "99 family 0 instance 0 repetition 0", "98 family 0 instance 0 repetition 1", "97 family 0 instance 0 aggregate", "96 family 0 instance 1 repetition 0", "95 family 0 instance 1 repetition 1", "94 family 0 instance 1 aggregate", "93 family 1 instance 0 repetition 0", "92 family 1 instance 0 repetition 1", "91 family 1 instance 0 aggregate", "90 family 1 instance 1 repetition 0", "89 family 1 instance 1 repetition 1", "88 family 1 instance 1 aggregate", ] for _n in range(len(self.json["benchmarks"]) ** 2): random.shuffle(self.json["benchmarks"]) sorted_benchmarks = util.sort_benchmark_results(self.json)[ "benchmarks" ] self.assertEqual(len(expected_names), len(sorted_benchmarks)) for out, expected in zip( sorted_benchmarks, expected_names, strict=True ): self.assertEqual(out["name"], expected) class TestReportDifferenceWithUTestWhileDisplayingAggregatesOnly2( unittest.TestCase ): @classmethod def setUpClass(cls): def load_results(): import json testInputs = os.path.join( os.path.dirname(os.path.realpath(__file__)), "Inputs" ) testOutput1 = os.path.join(testInputs, "test5_run0.json") testOutput2 = os.path.join(testInputs, "test5_run1.json") with open(testOutput1) as f: json1 = json.load(f) json1["benchmarks"] = [ json1["benchmarks"][0] for i in range(1000) ] with open(testOutput2) as f: json2 = json.load(f) json2["benchmarks"] = [ json2["benchmarks"][0] for i in range(1000) ] return json1, json2 json1, json2 = load_results() cls.json_diff_report = get_difference_report(json1, json2, utest=True) def test_json_diff_report_pretty_printing(self): expect_line = [ "BM_ManyRepetitions_pvalue", "0.0000", "0.0000", "U", "Test,", "Repetitions:", "1000", "vs", "1000", ] output_lines_with_header = print_difference_report( self.json_diff_report, utest=True, utest_alpha=0.05, use_color=False ) output_lines = output_lines_with_header[2:] found = False for output_line in output_lines: parts = [x for x in output_line.split(" ") if x] found = expect_line == parts if found: break self.assertTrue(found) def test_json_diff_report(self): expected_output = [ { "name": "BM_ManyRepetitions", "label": "", "time_unit": "s", "run_type": "", "aggregate_name": "", "utest": { "have_optimal_repetitions": True, "cpu_pvalue": 0.0, "time_pvalue": 0.0, "nr_of_repetitions": 1000, "nr_of_repetitions_other": 1000, }, }, { "name": "OVERALL_GEOMEAN", "label": "", "measurements": [ { "real_time": 1.0, "cpu_time": 1000.000000000069, "real_time_other": 1000.000000000069, "cpu_time_other": 1.0, "time": 999.000000000069, "cpu": -0.9990000000000001, } ], "time_unit": "s", "run_type": "aggregate", "aggregate_name": "geomean", "utest": {}, }, ] self.assertEqual(len(self.json_diff_report), len(expected_output)) for out, expected in zip( self.json_diff_report, expected_output, strict=True ): self.assertEqual(out["name"], expected["name"]) self.assertEqual(out["time_unit"], expected["time_unit"]) assert_utest(self, out, expected) def assert_utest(unittest_instance, lhs, rhs): if lhs["utest"]: unittest_instance.assertAlmostEqual( lhs["utest"]["cpu_pvalue"], rhs["utest"]["cpu_pvalue"] ) unittest_instance.assertAlmostEqual( lhs["utest"]["time_pvalue"], rhs["utest"]["time_pvalue"] ) unittest_instance.assertEqual( lhs["utest"]["have_optimal_repetitions"], rhs["utest"]["have_optimal_repetitions"], ) else: # lhs is empty. assert if rhs is not. unittest_instance.assertEqual(lhs["utest"], rhs["utest"]) def assert_measurements(unittest_instance, lhs, rhs): for m1, m2 in zip(lhs["measurements"], rhs["measurements"], strict=False): unittest_instance.assertEqual(m1["real_time"], m2["real_time"]) unittest_instance.assertEqual(m1["cpu_time"], m2["cpu_time"]) # m1['time'] and m1['cpu'] hold values which are being calculated, # and therefore we must use almost-equal pattern. unittest_instance.assertAlmostEqual(m1["time"], m2["time"], places=4) unittest_instance.assertAlmostEqual(m1["cpu"], m2["cpu"], places=4) if __name__ == "__main__": unittest.main() # vim: tabstop=4 expandtab shiftwidth=4 softtabstop=4 # kate: tab-width: 4; replace-tabs on; indent-width 4; tab-indents: off; # kate: indent-mode python; remove-trailing-spaces modified; ================================================ FILE: tools/gbench/util.py ================================================ """util.py - General utilities for running, loading, and processing benchmarks """ import json import os import re import subprocess import sys import tempfile # Input file type enumeration IT_Invalid = 0 IT_JSON = 1 IT_Executable = 2 _num_magic_bytes = 2 if sys.platform.startswith("win") else 4 def is_executable_file(filename): """ Return 'True' if 'filename' names a valid file which is likely an executable. A file is considered an executable if it starts with the magic bytes for a EXE, Mach O, or ELF file. """ if not os.path.isfile(filename): return False with open(filename, mode="rb") as f: magic_bytes = f.read(_num_magic_bytes) if sys.platform == "darwin": return magic_bytes in [ b"\xfe\xed\xfa\xce", # MH_MAGIC b"\xce\xfa\xed\xfe", # MH_CIGAM b"\xfe\xed\xfa\xcf", # MH_MAGIC_64 b"\xcf\xfa\xed\xfe", # MH_CIGAM_64 b"\xca\xfe\xba\xbe", # FAT_MAGIC b"\xbe\xba\xfe\xca", # FAT_CIGAM ] elif sys.platform.startswith("win"): return magic_bytes == b"MZ" else: return magic_bytes == b"\x7fELF" def is_json_file(filename): """ Returns 'True' if 'filename' names a valid JSON output file. 'False' otherwise. """ try: with open(filename) as f: json.load(f) return True except BaseException: pass return False def classify_input_file(filename): """ Return a tuple (type, msg) where 'type' specifies the classified type of 'filename'. If 'type' is 'IT_Invalid' then 'msg' is a human readable string representing the error. """ ftype = IT_Invalid err_msg = None if not os.path.exists(filename): err_msg = "'%s' does not exist" % filename elif not os.path.isfile(filename): err_msg = "'%s' does not name a file" % filename elif is_executable_file(filename): ftype = IT_Executable elif is_json_file(filename): ftype = IT_JSON else: err_msg = ( "'%s' does not name a valid benchmark executable or JSON file" % filename ) return ftype, err_msg def check_input_file(filename): """ Classify the file named by 'filename' and return the classification. If the file is classified as 'IT_Invalid' print an error message and exit the program. """ ftype, msg = classify_input_file(filename) if ftype == IT_Invalid: print("Invalid input file: %s" % msg) sys.exit(1) return ftype def find_benchmark_flag(prefix, benchmark_flags): """ Search the specified list of flags for a flag matching `` and if it is found return the arg it specifies. If specified more than once the last value is returned. If the flag is not found None is returned. """ assert prefix.startswith("--") assert prefix.endswith("=") result = None for f in benchmark_flags: if f.startswith(prefix): result = f[len(prefix) :] return result def remove_benchmark_flags(prefix, benchmark_flags): """ Return a new list containing the specified benchmark_flags except those with the specified prefix. """ assert prefix.startswith("--") assert prefix.endswith("=") return [f for f in benchmark_flags if not f.startswith(prefix)] def load_benchmark_results(fname, benchmark_filter): """ Read benchmark output from a file and return the JSON object. Apply benchmark_filter, a regular expression, with nearly the same semantics of the --benchmark_filter argument. May be None. Note: the Python regular expression engine is used instead of the one used by the C++ code, which may produce different results in complex cases. REQUIRES: 'fname' names a file containing JSON benchmark output. """ def benchmark_wanted(benchmark): if benchmark_filter is None: return True name = benchmark.get("run_name", None) or benchmark["name"] return re.search(benchmark_filter, name) is not None with open(fname) as f: results = json.load(f) if "json_schema_version" in results.get("context", {}): json_schema_version = results["context"]["json_schema_version"] if json_schema_version != 1: print( f"In {fname}, got unnsupported JSON schema version:" f" {json_schema_version}, expected 1" ) sys.exit(1) if "benchmarks" in results: results["benchmarks"] = list( filter(benchmark_wanted, results["benchmarks"]) ) return results def sort_benchmark_results(result): benchmarks = result["benchmarks"] # From inner key to the outer key! benchmarks = sorted( benchmarks, key=lambda benchmark: benchmark.get("repetition_index", -1), ) benchmarks = sorted( benchmarks, key=lambda benchmark: 1 if "run_type" in benchmark and benchmark["run_type"] == "aggregate" else 0, ) benchmarks = sorted( benchmarks, key=lambda benchmark: benchmark.get("per_family_instance_index", -1), ) benchmarks = sorted( benchmarks, key=lambda benchmark: benchmark.get("family_index", -1), ) result["benchmarks"] = benchmarks return result def run_benchmark(exe_name, benchmark_flags): """ Run a benchmark specified by 'exe_name' with the specified 'benchmark_flags'. The benchmark is run directly as a subprocess to preserve real time console output. RETURNS: A JSON object representing the benchmark output """ output_name = find_benchmark_flag("--benchmark_out=", benchmark_flags) is_temp_output = False if output_name is None: is_temp_output = True thandle, output_name = tempfile.mkstemp() os.close(thandle) benchmark_flags = [ *list(benchmark_flags), "--benchmark_out=%s" % output_name, ] cmd = [exe_name, *benchmark_flags] print("RUNNING: %s" % " ".join(cmd)) exitCode = subprocess.call(cmd) if exitCode != 0: print("TEST FAILED...") sys.exit(exitCode) json_res = load_benchmark_results(output_name, None) if is_temp_output: os.unlink(output_name) return json_res def run_or_load_benchmark(filename, benchmark_flags): """ Get the results for a specified benchmark. If 'filename' specifies an executable benchmark then the results are generated by running the benchmark. Otherwise 'filename' must name a valid JSON output file, which is loaded and the result returned. """ ftype = check_input_file(filename) if ftype == IT_JSON: benchmark_filter = find_benchmark_flag( "--benchmark_filter=", benchmark_flags ) return load_benchmark_results(filename, benchmark_filter) if ftype == IT_Executable: return run_benchmark(filename, benchmark_flags) raise ValueError("Unknown file type %s" % ftype) ================================================ FILE: tools/libpfm.BUILD.bazel ================================================ """Build rule for libpfm, which is required to collect performance counters for BENCHMARK_ENABLE_LIBPFM builds.""" load("@rules_cc//cc:defs.bzl", "cc_library") AARCH32_SRCS_COMMON = [ "lib/pfmlib_arm.c", "lib/pfmlib_arm_armv7_pmuv1.c", "lib/pfmlib_arm_armv6.c", "lib/pfmlib_arm_armv8.c", "lib/pfmlib_tx2_unc_perf_event.c", ] AARCH32_SRCS_LINUX = [ "lib/pfmlib_arm_perf_event.c", ] AARCH64_SRCS_COMMON = [ "lib/pfmlib_arm.c", "lib/pfmlib_arm_armv8.c", "lib/pfmlib_tx2_unc_perf_event.c", ] AARCH64_SRCS_LINUX = [ "lib/pfmlib_arm_perf_event.c", ] MIPS_SRCS_COMMON = [ "lib/pfmlib_mips.c", "lib/pfmlib_mips_74k.c", ] MIPS_SRCS_LINUX = [ "lib/pfmlib_mips_perf_event.c", ] POWERPC_SRCS_COMMON = [ "lib/pfmlib_powerpc.c", "lib/pfmlib_power4.c", "lib/pfmlib_ppc970.c", "lib/pfmlib_power5.c", "lib/pfmlib_power6.c", "lib/pfmlib_power7.c", "lib/pfmlib_torrent.c", "lib/pfmlib_power8.c", "lib/pfmlib_power9.c", "lib/pfmlib_powerpc_nest.c", ] POWERPC_SRCS_LINUX = [ "lib/pfmlib_powerpc_perf_event.c", ] S390X_SRCS_COMMON = [ "lib/pfmlib_s390x_cpumf.c", ] S390X_SRCS_LINUX = [ "lib/pfmlib_s390x_perf_event.c", ] X86_64_SRCS_COMMON = [ "lib/pfmlib_amd64.c", "lib/pfmlib_intel_core.c", "lib/pfmlib_intel_x86.c", "lib/pfmlib_intel_x86_arch.c", "lib/pfmlib_intel_atom.c", "lib/pfmlib_intel_nhm_unc.c", "lib/pfmlib_intel_nhm.c", "lib/pfmlib_intel_wsm.c", "lib/pfmlib_intel_snb.c", "lib/pfmlib_intel_snb_unc.c", "lib/pfmlib_intel_ivb.c", "lib/pfmlib_intel_ivb_unc.c", "lib/pfmlib_intel_hsw.c", "lib/pfmlib_intel_bdw.c", "lib/pfmlib_intel_skl.c", "lib/pfmlib_intel_icl.c", "lib/pfmlib_intel_rapl.c", "lib/pfmlib_intel_snbep_unc.c", "lib/pfmlib_intel_snbep_unc_cbo.c", "lib/pfmlib_intel_snbep_unc_ha.c", "lib/pfmlib_intel_snbep_unc_imc.c", "lib/pfmlib_intel_snbep_unc_pcu.c", "lib/pfmlib_intel_snbep_unc_qpi.c", "lib/pfmlib_intel_snbep_unc_ubo.c", "lib/pfmlib_intel_snbep_unc_r2pcie.c", "lib/pfmlib_intel_snbep_unc_r3qpi.c", "lib/pfmlib_intel_ivbep_unc_cbo.c", "lib/pfmlib_intel_ivbep_unc_ha.c", "lib/pfmlib_intel_ivbep_unc_imc.c", "lib/pfmlib_intel_ivbep_unc_pcu.c", "lib/pfmlib_intel_ivbep_unc_qpi.c", "lib/pfmlib_intel_ivbep_unc_ubo.c", "lib/pfmlib_intel_ivbep_unc_r2pcie.c", "lib/pfmlib_intel_ivbep_unc_r3qpi.c", "lib/pfmlib_intel_ivbep_unc_irp.c", "lib/pfmlib_intel_hswep_unc_cbo.c", "lib/pfmlib_intel_hswep_unc_ha.c", "lib/pfmlib_intel_hswep_unc_imc.c", "lib/pfmlib_intel_hswep_unc_pcu.c", "lib/pfmlib_intel_hswep_unc_qpi.c", "lib/pfmlib_intel_hswep_unc_ubo.c", "lib/pfmlib_intel_hswep_unc_r2pcie.c", "lib/pfmlib_intel_hswep_unc_r3qpi.c", "lib/pfmlib_intel_hswep_unc_irp.c", "lib/pfmlib_intel_hswep_unc_sbo.c", "lib/pfmlib_intel_bdx_unc_cbo.c", "lib/pfmlib_intel_bdx_unc_ubo.c", "lib/pfmlib_intel_bdx_unc_sbo.c", "lib/pfmlib_intel_bdx_unc_ha.c", "lib/pfmlib_intel_bdx_unc_imc.c", "lib/pfmlib_intel_bdx_unc_irp.c", "lib/pfmlib_intel_bdx_unc_pcu.c", "lib/pfmlib_intel_bdx_unc_qpi.c", "lib/pfmlib_intel_bdx_unc_r2pcie.c", "lib/pfmlib_intel_bdx_unc_r3qpi.c", "lib/pfmlib_intel_skx_unc_cha.c", "lib/pfmlib_intel_skx_unc_iio.c", "lib/pfmlib_intel_skx_unc_imc.c", "lib/pfmlib_intel_skx_unc_irp.c", "lib/pfmlib_intel_skx_unc_m2m.c", "lib/pfmlib_intel_skx_unc_m3upi.c", "lib/pfmlib_intel_skx_unc_pcu.c", "lib/pfmlib_intel_skx_unc_ubo.c", "lib/pfmlib_intel_skx_unc_upi.c", "lib/pfmlib_intel_knc.c", "lib/pfmlib_intel_slm.c", "lib/pfmlib_intel_tmt.c", "lib/pfmlib_intel_knl.c", "lib/pfmlib_intel_knl_unc_imc.c", "lib/pfmlib_intel_knl_unc_edc.c", "lib/pfmlib_intel_knl_unc_cha.c", "lib/pfmlib_intel_knl_unc_m2pcie.c", "lib/pfmlib_intel_glm.c", "lib/pfmlib_intel_netburst.c", "lib/pfmlib_amd64_k7.c", "lib/pfmlib_amd64_k8.c", "lib/pfmlib_amd64_fam10h.c", "lib/pfmlib_amd64_fam11h.c", "lib/pfmlib_amd64_fam12h.c", "lib/pfmlib_amd64_fam14h.c", "lib/pfmlib_amd64_fam15h.c", "lib/pfmlib_amd64_fam17h.c", "lib/pfmlib_amd64_fam16h.c", ] X86_SRCS_COMMON = X86_64_SRCS_COMMON + [ "lib/pfmlib_intel_coreduo.c", "lib/pfmlib_intel_p6.c", ] filegroup( name = "cpu_srcs", srcs = select({ "@platforms//cpu:x86_32": X86_SRCS_COMMON, "@platforms//cpu:x86_64": X86_64_SRCS_COMMON, "@platforms//cpu:aarch32": AARCH32_SRCS_COMMON, "@platforms//cpu:aarch64": AARCH64_SRCS_COMMON, "@platforms//cpu:mips64": MIPS_SRCS_COMMON, "@platforms//cpu:ppc32": POWERPC_SRCS_COMMON, "@platforms//cpu:ppc64le": POWERPC_SRCS_COMMON, "@platforms//cpu:ppc": POWERPC_SRCS_COMMON, "@platforms//cpu:s390x": S390X_SRCS_COMMON, "//conditions:default": [], }), ) filegroup( name = "linux_srcs", srcs = select({ "@platforms//cpu:aarch32": AARCH32_SRCS_LINUX, "@platforms//cpu:aarch64": AARCH64_SRCS_LINUX, "@platforms//cpu:mips64": MIPS_SRCS_LINUX, "@platforms//cpu:ppc32": POWERPC_SRCS_LINUX, "@platforms//cpu:ppc64le": POWERPC_SRCS_LINUX, "@platforms//cpu:ppc": POWERPC_SRCS_LINUX, "@platforms//cpu:s390x": S390X_SRCS_LINUX, "//conditions:default": [], }), ) filegroup( name = "srcs", srcs = [ "lib/pfmlib_common.c", "lib/pfmlib_perf_event.c", "lib/pfmlib_perf_event_pmu.c", "lib/pfmlib_perf_event_priv.h", "lib/pfmlib_perf_event_raw.c", "lib/pfmlib_torrent.c", "lib/pfmlib_tx2_unc_perf_event.c", ":cpu_srcs", ] + select({ "@platforms//os:linux": [":linux_srcs"], "//conditions:default": [], }), ) cc_library( name = "pfm", srcs = [ ":srcs", ], hdrs = glob([ "include/perfmon/*.h", ]), copts = [ "-Wno-format-truncation", "-Wno-use-after-free", "-fPIC", "-D_REENTRANT", "-fvisibility=hidden", ] + select({ "@platforms//cpu:aarch32": ["-DCONFIG_PFMLIB_ARCH_ARM"], "@platforms//cpu:aarch64": ["-DCONFIG_PFMLIB_ARCH_ARM64"], "@platforms//cpu:mips64": ["-DCONFIG_PFMLIB_ARCH_MIPS"], "@platforms//cpu:ppc32": ["-DCONFIG_PFMLIB_ARCH_POWERPC"], "@platforms//cpu:ppc64le": ["-DCONFIG_PFMLIB_ARCH_POWERPC"], "@platforms//cpu:ppc": ["-DCONFIG_PFMLIB_ARCH_POWERPC"], "@platforms//cpu:s390x": ["-DCONFIG_PFMLIB_ARCH_S390X"], "//conditions:default": [], }), includes = [ "include", "lib", ], strip_include_prefix = "include", textual_hdrs = glob([ "lib/**/*.h", ]), visibility = [ "//visibility:public", ], ) alias( name = "libpfm", actual = ":pfm", visibility = [ "//visibility:public", ], ) ================================================ FILE: tools/requirements.txt ================================================ numpy == 2.4.3 scipy == 1.17.1 ================================================ FILE: tools/strip_asm.py ================================================ #!/usr/bin/env python3 """ strip_asm.py - Cleanup ASM output for the specified file """ import os import re import sys from argparse import ArgumentParser def find_used_labels(asm): found = set() label_re = re.compile(r"\s*j[a-z]+\s+\.L([a-zA-Z0-9][a-zA-Z0-9_]*)") for line in asm.splitlines(): m = label_re.match(line) if m: found.add(".L%s" % m.group(1)) return found def normalize_labels(asm): decls = set() label_decl = re.compile("^[.]{0,1}L([a-zA-Z0-9][a-zA-Z0-9_]*)(?=:)") for line in asm.splitlines(): m = label_decl.match(line) if m: decls.add(m.group(0)) if len(decls) == 0: return asm needs_dot = next(iter(decls))[0] != "." if not needs_dot: return asm for ld in decls: asm = re.sub(r"(^|\s+)" + ld + r"(?=:|\s)", "\\1." + ld, asm) return asm def transform_labels(asm): asm = normalize_labels(asm) used_decls = find_used_labels(asm) new_asm = "" label_decl = re.compile(r"^\.L([a-zA-Z0-9][a-zA-Z0-9_]*)(?=:)") for line in asm.splitlines(): m = label_decl.match(line) if not m or m.group(0) in used_decls: new_asm += line new_asm += "\n" return new_asm def is_identifier(tk): if len(tk) == 0: return False first = tk[0] if not first.isalpha() and first != "_": return False for i in range(1, len(tk)): c = tk[i] if not c.isalnum() and c != "_": return False return True def process_identifiers(line): """ process_identifiers - process all identifiers and modify them to have consistent names across all platforms; specifically across ELF and MachO. For example, MachO inserts an additional understore at the beginning of names. This function removes that. """ parts = re.split(r"([a-zA-Z0-9_]+)", line) new_line = "" for tk in parts: if is_identifier(tk) and ( tk.startswith("__Z") or ( tk.startswith("_") and len(tk) > 1 and tk[1].isalpha() and tk[1] != "Z" ) ): tk = tk[1:] new_line += tk return new_line def process_asm(asm): """ Strip the ASM of unwanted directives and lines """ new_contents = "" asm = transform_labels(asm) # TODO: Add more things we want to remove discard_regexes = [ re.compile(r"\s+\..*$"), # directive re.compile(r"\s*#(NO_APP|APP)$"), # inline ASM re.compile(r"\s*#.*$"), # comment line re.compile( r"\s*\.globa?l\s*([.a-zA-Z_][a-zA-Z0-9$_.]*)" ), # global directive re.compile( r"\s*\.(string|asciz|ascii|[1248]?byte|short|word|long|quad|value|zero)" ), ] keep_regexes: list[re.Pattern] = [] fn_label_def = re.compile("^[a-zA-Z_][a-zA-Z0-9_.]*:") for line in asm.splitlines(): # Remove Mach-O attribute line = line.replace("@GOTPCREL", "") add_line = True for reg in discard_regexes: if reg.match(line) is not None: add_line = False break for reg in keep_regexes: if reg.match(line) is not None: add_line = True break if add_line: if fn_label_def.match(line) and len(new_contents) != 0: new_contents += "\n" line = process_identifiers(line) new_contents += line new_contents += "\n" return new_contents def main(): parser = ArgumentParser(description="generate a stripped assembly file") parser.add_argument( "input", metavar="input", type=str, nargs=1, help="An input assembly file", ) parser.add_argument( "out", metavar="output", type=str, nargs=1, help="The output file" ) args, _ = parser.parse_known_args() input = args.input[0] output = args.out[0] if not os.path.isfile(input): print("ERROR: input file '%s' does not exist" % input) sys.exit(1) with open(input) as f: contents = f.read() new_contents = process_asm(contents) with open(output, "w") as f: f.write(new_contents) if __name__ == "__main__": main() # vim: tabstop=4 expandtab shiftwidth=4 softtabstop=4 # kate: tab-width: 4; replace-tabs on; indent-width 4; tab-indents: off; # kate: indent-mode python; remove-trailing-spaces modified;