Repository: eth-cscs/COSMA
Branch: master
Commit: 2df0432ce9b2
Files: 155
Total size: 2.0 MB

Directory structure:
gitextract_zu2ke5gg/

├── .clang-format
├── .gitattributes
├── .github/
│   ├── tag-issue.md
│   └── workflows/
│       └── version_checker.yml
├── .gitignore
├── .gitlab-ci.yml
├── .gitmodules
├── ATTRIBUTIONS.md
├── CMakeLists.txt
├── INSTALL.md
├── LICENSE
├── README.md
├── _config.yml
├── benchmarks/
│   ├── CMakeLists.txt
│   ├── allgather-volume.cpp
│   ├── bcast-volume.cpp
│   ├── blocking_vs_non_blocking.cpp
│   ├── dgemm_perf_model.cpp
│   ├── gpu_gemm_cublas.cpp
│   ├── gpu_gemm_libsci_acc.cpp
│   ├── reduce-scatter.cpp
│   ├── run_ubench.sh
│   ├── scalapack_transformer.cpp
│   ├── sendrecv.cpp
│   ├── transpose.cpp
│   └── ubench-allgather.cpp
├── bors.toml
├── ci/
│   ├── baseimage.cuda.Dockerfile
│   ├── build.Dockerfile
│   ├── cscs.yml
│   └── mps-wrapper.sh
├── cmake/
│   ├── FindARMPL.cmake
│   ├── FindATLAS.cmake
│   ├── FindBLIS.cmake
│   ├── FindBlas.cmake
│   ├── FindCRAY_LIBSCI.cmake
│   ├── FindFLEXIBLAS.cmake
│   ├── FindGenericBLAS.cmake
│   ├── FindMKL.cmake
│   ├── FindNCCL.cmake
│   ├── FindNVPL.cmake
│   ├── FindOPENBLAS.cmake
│   ├── FindSCALAPACK.cmake
│   ├── GitSubmodule.cmake
│   ├── adjust_mpiexec_flags.cmake
│   ├── build_type.cmake
│   ├── cosma.pc.in
│   ├── cosmaConfig.cmake.in
│   └── find_cuda_version.cmake
├── docker/
│   ├── asan/
│   │   ├── build-env.Dockerfile
│   │   └── deploy.Dockerfile
│   ├── cpu-release/
│   │   ├── build-env.Dockerfile
│   │   └── deploy.Dockerfile
│   └── gpu/
│       ├── build-env.Dockerfile
│       └── deploy.Dockerfile
├── libs/
│   └── gtest_mpi/
│       ├── CMakeLists.txt
│       ├── LICENSE
│       ├── README.md
│       ├── external/
│       │   └── gtest/
│       │       ├── CMakeLists.txt
│       │       ├── include/
│       │       │   └── gtest/
│       │       │       └── gtest.h
│       │       └── src/
│       │           └── gtest-all.cpp
│       └── include/
│           └── gtest_mpi/
│               ├── gtest_mpi.hpp
│               └── gtest_mpi_internal.hpp
├── miniapp/
│   ├── CMakeLists.txt
│   ├── cosma_miniapp.cpp
│   ├── cosma_statistics.cpp
│   ├── layout_miniapp.cpp
│   └── pxgemm_miniapp.cpp
├── scripts/
│   ├── build.sh
│   ├── daint-mc_env.sh
│   ├── install_dependencies.py
│   ├── piz_daint_cpu.sh
│   ├── piz_daint_gpu.sh
│   ├── piz_daint_gpu_aware_mpi.sh
│   ├── run_gpu.sh
│   ├── schedule_miniapp_on_daint_cpu.sh
│   ├── schedule_miniapp_on_daint_gpu.sh
│   └── schedule_tests_on_daint.sh
├── spack/
│   └── packages/
│       └── costa/
│           └── package.py
├── spack_repo/
│   └── cosma/
│       ├── packages/
│       │   ├── cosma/
│       │   │   ├── fj-ssl2.patch
│       │   │   └── package.py
│       │   └── tiled-mm/
│       │       └── package.py
│       └── repo.yaml
├── src/
│   └── cosma/
│       ├── CMakeLists.txt
│       ├── aligned_allocator.hpp
│       ├── blacs.hpp
│       ├── blas.cpp
│       ├── blas.hpp
│       ├── buffer.cpp
│       ├── buffer.hpp
│       ├── cinterface.cpp
│       ├── cinterface.hpp
│       ├── communicator.cpp
│       ├── communicator.hpp
│       ├── context.cpp
│       ├── context.hpp
│       ├── cosma_pxgemm.cpp
│       ├── cosma_pxgemm.hpp
│       ├── environment_variables.cpp
│       ├── environment_variables.hpp
│       ├── gpu/
│       │   ├── gpu_aware_mpi_utils.cpp
│       │   ├── gpu_aware_mpi_utils.hpp
│       │   ├── gpu_runtime_api.hpp
│       │   ├── nccl_mapper.hpp
│       │   ├── nccl_utils.cpp
│       │   ├── nccl_utils.hpp
│       │   └── utils.hpp
│       ├── interpose.h
│       ├── interval.cpp
│       ├── interval.hpp
│       ├── layout.cpp
│       ├── layout.hpp
│       ├── local_multiply.cpp
│       ├── local_multiply.hpp
│       ├── mapper.cpp
│       ├── mapper.hpp
│       ├── math_utils.cpp
│       ├── math_utils.hpp
│       ├── matrix.cpp
│       ├── matrix.hpp
│       ├── memory_pool.cpp
│       ├── memory_pool.hpp
│       ├── mpi_mapper.hpp
│       ├── multiply.cpp
│       ├── multiply.hpp
│       ├── one_sided_communicator.cpp
│       ├── one_sided_communicator.hpp
│       ├── pinned_buffers.cpp
│       ├── pinned_buffers.hpp
│       ├── prefixed_pxgemm.cpp
│       ├── prefixed_pxgemm.h
│       ├── profiler.hpp
│       ├── pxgemm.cpp
│       ├── pxgemm.h
│       ├── pxgemm_params.hpp
│       ├── random_generator.hpp
│       ├── scalapack.cpp
│       ├── scalapack.hpp
│       ├── statistics.hpp
│       ├── strategy.cpp
│       ├── strategy.hpp
│       ├── timer.hpp
│       ├── two_sided_communicator.cpp
│       └── two_sided_communicator.hpp
├── tests/
│   ├── CMakeLists.txt
│   ├── main_gtest.cpp
│   ├── main_gtest_mpi.cpp
│   ├── mapper.cpp
│   ├── multiply.cpp
│   ├── multiply_using_layout.cpp
│   ├── pdgemm.cpp
│   └── scalar_matmul.cpp
└── utils/
    ├── cosma_utils.hpp
    ├── parse_strategy.hpp
    └── pxgemm_utils.hpp

================================================
FILE CONTENTS
================================================

================================================
FILE: .clang-format
================================================
---
# Used for all options not set in this file
BasedOnStyle:  LLVM
AllowAllParametersOfDeclarationOnNextLine: false
BinPackArguments: false
BinPackParameters: false
BreakConstructorInitializersBeforeComma: true
AlwaysBreakTemplateDeclarations: Yes
IndentWidth: 4


================================================
FILE: .gitattributes
================================================
*.git export-ignore
*.github export-ignore
/ci export-ignore
/docker export-ignore

*.DS_Store export-ignore
*.gitattributes export-ignore
/*.clang-format export-ignore
*.gitignore export-ignore
/_config.yml export-ignore
/bors.toml export-ignore
*.gitmodules export-ignore
/.gitlab-ci.yml export-ignore


================================================
FILE: .github/tag-issue.md
================================================
---
title: cmake project version {{ env.CMAKE_VERSION }} does not match git tag {{ env.GIT_VERSION }}
labels: bug
---
The cmake version should be in sync with the git version to ensure the correct file names and sonames of shared libraries.

================================================
FILE: .github/workflows/version_checker.yml
================================================
name: VersionChecker

on:
  push:
    tags:
      - 'v*'

jobs:
  checker:
    runs-on: ubuntu-latest

    steps:
    - uses: actions/checkout@v3
      with:
          fetch-depth: 0
    - name: Version check
      id: check
      run: |
           mkdir build 
           cd build 
           cmake .. || true
           CMAKE_VERSION="v$(cat CMakeCache.txt | grep '^CMAKE_PROJECT_VERSION\b' | cut -d "=" -f2)"
           GIT_VERSION=$(git describe --tags)
           if [ "$CMAKE_VERSION" != "$GIT_VERSION" ]; then
             echo ::set-output name=CMAKE_ISSUE::yes
             echo ::set-output name=CMAKE_VERSION::$CMAKE_VERSION
             echo ::set-output name=GIT_VERSION::$GIT_VERSION
           fi
    - uses: JasonEtco/create-an-issue@v2.4.0
      if: steps.check.outputs.CMAKE_ISSUE == 'yes'
      env:
        GITHUB_TOKEN: ${{ secrets.GITHUB_TOKEN }}
        CMAKE_VERSION: ${{ steps.check.outputs.CMAKE_VERSION }}
        GIT_VERSION: ${{ steps.check.outputs.GIT_VERSION }}
      with:
        filename: .github/tag-issue.md


================================================
FILE: .gitignore
================================================
.DS_Store*
*.swp
__pycache__
build
exports
doc
.idea*
CMakeLists.txt.user
.vscode*


================================================
FILE: .gitlab-ci.yml
================================================
include:
  - remote: 'https://gitlab.com/cscs-ci/recipes/-/raw/master/templates/v2/.cscs.yml'

stages:
  - build
  - test

##
## BUILDS
##
.build_common:
  extends: .dind
  stage: build
  only: ['master', 'staging', 'trying']
  variables:
    GIT_SUBMODULE_STRATEGY: recursive
  before_script:
    - docker login -u $CSCS_REGISTRY_USER -p $CSCS_REGISTRY_PASSWORD $CSCS_REGISTRY
  script:
    - docker build --network=host --cache-from $BUILD_IMAGE --build-arg BUILDKIT_INLINE_CACHE=1 -t $BUILD_IMAGE -f $BUILD_DOCKERFILE .
    - docker push $BUILD_IMAGE
    - docker build -t $DEPLOY_IMAGE --network=host --build-arg BUILDKIT_INLINE_CACHE=1 --build-arg BUILD_ENV=$BUILD_IMAGE -f $DEPLOY_DOCKERFILE .
    - docker push $DEPLOY_IMAGE

# Builds a Docker image for the current commit, cpu / gpu
build sanitizer cpu:
  extends: .build_common
  variables:
    BUILD_DOCKERFILE: docker/asan/build-env.Dockerfile
    BUILD_IMAGE: $CSCS_REGISTRY_IMAGE/build-env-asan:latest
    DEPLOY_DOCKERFILE: docker/asan/deploy.Dockerfile
    DEPLOY_IMAGE: $CSCS_REGISTRY_IMAGE/deploy-cpu-asan:$CI_COMMIT_SHA

build cpu:
  extends: .build_common
  variables:
    BUILD_DOCKERFILE: docker/cpu-release/build-env.Dockerfile
    BUILD_IMAGE: $CSCS_REGISTRY_IMAGE/build-env-cpu:latest
    DEPLOY_DOCKERFILE: docker/cpu-release/deploy.Dockerfile
    DEPLOY_IMAGE: $CSCS_REGISTRY_IMAGE/deploy-cpu:$CI_COMMIT_SHA


build gpu:
  extends: .build_common
  variables:
    BUILD_DOCKERFILE: docker/gpu/build-env.Dockerfile
    BUILD_IMAGE: $CSCS_REGISTRY_IMAGE/build-env-gpu:latest
    DEPLOY_DOCKERFILE: docker/gpu/deploy.Dockerfile
    DEPLOY_IMAGE: $CSCS_REGISTRY_IMAGE/deploy-gpu:$CI_COMMIT_SHA

sanitize:
  stage: test
  only: ['master', 'staging', 'trying']
  trigger:
    strategy: depend
    include: /ci/sanitize.yml

cpu test:
  stage: test
  only: ['master', 'staging', 'trying']
  trigger:
    strategy: depend
    include: /ci/cpu.yml

gpu test:
  stage: test
  only: ['master', 'staging', 'trying']
  trigger:
    strategy: depend
    include: /ci/gpu.yml


================================================
FILE: .gitmodules
================================================
[submodule "libs/Tiled-MM"]
	path = libs/Tiled-MM
	url = https://github.com/eth-cscs/Tiled-MM.git
[submodule "libs/COSTA"]
	path = libs/COSTA
	url = https://github.com/eth-cscs/COSTA
[submodule "libs/cxxopts"]
	path = libs/cxxopts
	url = https://github.com/jarro2783/cxxopts


================================================
FILE: ATTRIBUTIONS.md
================================================
# COSMA Attributions:

COSMA uses the following external projects:
- [COSTA](https://github.com/eth-cscs/COSTA): used for transforming between COSMA and SCALAPACK matrix data layouts and for transposing distributed matrices. Licensed under the [BSD-3-Clause License](https://github.com/eth-cscs/COSTA/blob/master/LICENSE).
- [Tiled-MM](https://github.com/eth-cscs/Tiled-MM): used for performing `dgemm` calls with the GPU-backend. Licensed under the [BSD-3-Clause License](https://github.com/eth-cscs/Tiled-MM/blob/master/LICENSE).
- [semiprof](https://github.com/bcumming/semiprof): used for profiling the code. Licensed under the [BSD-3-Clause License](https://github.com/bcumming/semiprof/blob/master/LICENSE).
- [options](https://github.com/kabicm/options): used for parsing the command line options. Licensed under the [BSD-3-Clause License](https://github.com/kabicm/options/blob/master/LICENCE).
- [cxxopts](https://github.com/jarro2783/cxxopts): user for parsing the command line options. Licensed under the [MIT License](https://github.com/jarro2783/cxxopts/blob/master/LICENSE).
- [googletest](https://github.com/google/googletest): used for unit testing. Licensed under the [BSD-3-Clause License](https://github.com/google/googletest/blob/master/LICENSE).
- [gtest_mpi](https://github.com/AdhocMan/gtest_mpi): used as a plugin for googletest adding the MPI support. Licensed under the [BSD-3-Clause License](https://github.com/AdhocMan/gtest_mpi/blob/master/LICENSE).
- [interpose](https://github.com/ccurtsinger/interpose): used for dispatching some of the pxgemm calls to SCALAPACK. Licensed under the [MIT License](https://github.com/ccurtsinger/interpose/blob/master/COPYING.md).

Most of these projects are added as submodules and can be found in the `libs` folder.


================================================
FILE: CMakeLists.txt
================================================
cmake_minimum_required(VERSION 3.24 FATAL_ERROR)

project(cosma
  DESCRIPTION "Communication Optimal Matrix Multiplication"
  HOMEPAGE_URL "https://github.com/eth-cscs/COSMA"
  VERSION 2.8.4
  LANGUAGES CXX)

include(FetchContent)

list(APPEND CMAKE_MODULE_PATH "${CMAKE_CURRENT_SOURCE_DIR}/cmake")
list(APPEND CMAKE_MODULE_PATH "${CMAKE_CURRENT_SOURCE_DIR}/cmake/modules")
include(cmake/build_type.cmake)
include(cmake/adjust_mpiexec_flags.cmake)
set(CMAKE_EXPORT_COMPILE_COMMANDS "YES") # always write compile_commands.json

# Options
#

set(COSMA_GPU_BACKENDS_LIST "CUDA" "ROCM")
set(COSMA_SCALAPACK_LIST "OFF" "MKL" "CRAY_LIBSCI" "NVPL" "CUSTOM")
set(COSMA_BLAS_LIST   "auto" "MKL" "OPENBLAS" "CRAY_LIBSCI" "NVPL" "CUSTOM" "BLIS" "ATLAS" "CUDA" "ROCM" "OFF")
option(COSMA_WITH_TESTS "Generate the test target." ON)
option(COSMA_WITH_APPS "Generate the miniapp targets." ON)
option(COSMA_WITH_BENCHMARKS "Generate the benchmark targets." ON)
option(COSMA_WITH_PROFILING "Enable profiling." OFF)
option(COSMA_WITH_NCCL "Use NCCL as communication backend." OFF)
option(COSMA_WITH_RCCL "Use RCCL as communication backend." OFF)
option(COSMA_WITH_GPU_AWARE_MPI "Use gpu-aware MPI for communication." OFF)
option(COSMA_USE_UNIFIED_MEMORY "Use unified memory when GPU acceleration is ON" OFF)
option(BUILD_SHARED_LIBS "Build shared libraries." OFF)
set(COSMA_SCALAPACK "OFF" CACHE STRING "scalapack implementation. Can be MKL, CRAY_LIBSCI, NVPL, CUSTOM or OFF.")
set(COSMA_BLAS "OFF" CACHE STRING "Blas library for computations on host or GPU")

set(COSMA_BLAS_VENDOR "OFF")
set(COSMA_GPU_BACKEND "OFF")

set_property(CACHE COSMA_SCALAPACK PROPERTY STRINGS ${COSMA_SCALAPACK_LIST})
set_property(CACHE COSMA_BLAS PROPERTY STRINGS ${COSMA_BLAS_LIST})

# we keep the old cosma behavior of indicating GPU support as a blas
# implementation. We have to sort out what we should find for the FindBLAS and
# GPU supports since they are treated as separate components

if(COSMA_BLAS STREQUAL "OFF")
    message(FATAL_ERROR "A Blas implementation is needed when running on CPU only: choices are : auto, MKL, OPENBLAS, CRAY_LIBSCI, CUSTOM, BLIS, ATLAS, FLEXIBLAS, ARMPL, GenericBLAS, CUDA or ROCM")
endif()

if (COSMA_BLAS MATCHES "CUDA|ROCM")
  set(COSMA_GPU_BACKEND ${COSMA_BLAS})
  set(COSMA_BLAS_VENDOR "OFF")
else()
  set(COSMA_BLAS_VENDOR ${COSMA_BLAS})
  set(COSMA_GPU_BACKEND "OFF")
endif()

if ((COSMA_WITH_NCCL OR COSMA_WITH_RCCL) AND NOT COSMA_GPU_BACKEND IN_LIST COSMA_GPU_BACKENDS_LIST)
  message(FATAL_ERROR "NCCL (RCCL) can only be used with the GPU backend set to CUDA (ROCM).")
endif()

if (COSMA_WITH_GPU_AWARE_MPI AND NOT COSMA_GPU_BACKEND IN_LIST COSMA_GPU_BACKENDS_LIST)
  message(FATAL_ERROR "GPU-aware MPI can only be used with the GPU backend set to CUDA or ROCM.")
endif()

if(NOT CMAKE_BUILD_TYPE)
  set(CMAKE_BUILD_TYPE "Release")
endif()

# Dependencies
# MPI
set(MPI_DETERMINE_LIBRARY_VERSION TRUE)
find_package(MPI COMPONENTS CXX REQUIRED)
adjust_mpiexec_flags()

# check if scalapack backend is valid
message(STATUS "Selected SCALAPACK backend for COSMA: ${COSMA_SCALAPACK}")

if(NOT COSMA_SCALAPACK IN_LIST COSMA_SCALAPACK_LIST)
  message(FATAL_ERROR "Invalid value for COSMA_SCALAPACK!")
endif()

# the blas targets are only defined when COSMA_SCALAPACK is ON whatever value of COSMA_GPU_BACKEND
if (NOT COSMA_SCALAPACK MATCHES "OFF")
  if (COSMA_SCALAPACK MATCHES "MKL" OR COSMA_SCALAPACK MATCHES "CRAY_LIBSCI" OR COSMA_SCALAPACK MATCHES "NVPL")
    set(COSMA_BLAS_VENDOR ${COSMA_SCALAPACK})
  else()
    set(COSMA_BLAS_VENDOR "auto")
  endif()
endif()


if (NOT COSMA_BLAS_VENDOR MATCHES "OFF|CUDA|ROCM")
  find_package(Blas REQUIRED)
endif()

if (NOT COSMA_SCALAPACK MATCHES "OFF")
  find_package(SCALAPACK REQUIRED)
endif ()

set(COSTA_WITH_PROFILING ${COSMA_WITH_PROFILING} CACHE INTERNAL "")
set(COSTA_SCALAPACK ${COSMA_SCALAPACK} CACHE INTERNAL "")

FetchContent_Declare(
  costa
  GIT_REPOSITORY https://github.com/eth-cscs/costa.git
  GIT_TAG        2484769535772f807d402901ffca63bb6678dd42 # v2.3.0
  FIND_PACKAGE_ARGS NAMES costa
)

# the joy of fetch_content. if we build costa and cosma together
# fetch_content will pick up the FindSCALAPACK from cosma NOT costa.
if (NOT TARGET costa::scalapack::scalapack AND NOT COSMA_SCALAPACK MATCHES "OFF")
  add_library(costa::scalapack::scalapack ALIAS cosma::scalapack::scalapack)
endif ()

FetchContent_MakeAvailable(costa)

# these are only GPU-backends
if (COSMA_GPU_BACKEND MATCHES "CUDA|ROCM")
  set(TILEDMM_GPU_BACKEND ${COSMA_GPU_BACKEND} CACHE INTERNAL "")
  FetchContent_Declare(
    Tiled-MM
    GIT_REPOSITORY https://github.com/eth-cscs/Tiled-MM.git
    GIT_TAG      0eb75179e670a04c649b50ae5e91bb71b43e4d06 # v2.3.2
    FIND_PACKAGE_ARGS NAMES tiled-MM
  )
  FetchContent_MakeAvailable(Tiled-MM)

  if (COSMA_WITH_NCCL)
    find_package(CUDAToolkit REQUIRED)
    find_package(NCCL REQUIRED)
  elseif (COSMA_WITH_RCCL)
    find_package(hip REQUIRED)
    find_package(rccl REQUIRED)
  endif()

  if (NOT TARGET Tiled-MM::Tiled-MM)
      message("Tiled-mm target not found")
  endif ()

endif()

if (COSMA_WITH_PROFILING)
  FetchContent_Declare(
    semiprof
    GIT_REPOSITORY  https://github.com/bcumming/semiprof.git
    GIT_TAG         f132142ff2215dfa073e416fa7911d8877d62752
    FIND_PACKAGE_ARGS NAMES semiprof
  )
  FetchContent_MakeAvailable(semiprof)
endif ()

if (COSMA_WITH_TESTS OR COSMA_WITH_APPS)
  FetchContent_Declare(
    cxxopts
    GIT_REPOSITORY https://github.com/jarro2783/cxxopts.git
    GIT_TAG        4bf61f08697b110d9e3991864650a405b3dd515d # v3.2.1
    FIND_PACKAGE_ARGS NAMES cxxopts
  )
  FetchContent_MakeAvailable(cxxopts)
endif()

if (NOT TARGET Tiled-MM::Tiled-MM)
      message("Tiled-mm target not found")
endif ()
# preserve rpaths when installing and make the install folder relocatable
# use `CMAKE_SKIP_INSTALL_RPATH` to skip this
# https://spack.readthedocs.io/en/latest/workflows.html#write-the-cmake-build
list(FIND CMAKE_PLATFORM_IMPLICIT_LINK_DIRECTORIES
  "${CMAKE_INSTALL_PREFIX}/${CMAKE_INSTALL_LIBDIR}" isSystemDir)
# skip RPATH if COSMA is installed to system directories
if(isSystemDir STREQUAL "-1")
  set(CMAKE_INSTALL_RPATH_USE_LINK_PATH TRUE)
  if(APPLE)
    set(basePoint @loader_path)
  else()
    set(basePoint $ORIGIN)
  endif()
  file(RELATIVE_PATH relDir ${CMAKE_CURRENT_BINARY_DIR}/${CMAKE_INSTALL_BINDIR}
    ${CMAKE_CURRENT_BINARY_DIR}/${CMAKE_INSTALL_LIBDIR})
  set(CMAKE_INSTALL_RPATH ${basePoint} ${basePoint}/${relDir})
endif()

# COSMA
#
include(CMakePackageConfigHelpers)
include(GNUInstallDirs)

add_subdirectory(src/cosma)

install(DIRECTORY "${cosma_SOURCE_DIR}/src/cosma"
  DESTINATION "${CMAKE_INSTALL_INCLUDEDIR}"
  FILES_MATCHING
  PATTERN "*.hpp")

write_basic_package_version_file(
  "${cosma_BINARY_DIR}/cosmaConfigVersion.cmake"
  VERSION ${cosma_VERSION}
  COMPATIBILITY SameMajorVersion)

configure_file("${cosma_SOURCE_DIR}/cmake/cosma.pc.in"
  "${cosma_BINARY_DIR}/cosma.pc"
  @ONLY)

configure_file("${cosma_SOURCE_DIR}/cmake/cosmaConfig.cmake.in"
  "${cosma_BINARY_DIR}/cosmaConfig.cmake"
  @ONLY)

write_basic_package_version_file(
  "${cosma_BINARY_DIR}/cosmaConfigVersion.cmake"
  VERSION "${cosma_VERSION}"
  COMPATIBILITY SameMajorVersion)

install(FILES "${cosma_BINARY_DIR}/cosmaConfig.cmake"
  "${cosma_BINARY_DIR}/cosmaConfigVersion.cmake"
  "${cosma_BINARY_DIR}/cosmaConfigVersion.cmake"
  "${cosma_SOURCE_DIR}/cmake/FindMKL.cmake"
  "${cosma_SOURCE_DIR}/cmake/FindNVPL.cmake"
  "${cosma_SOURCE_DIR}/cmake/FindBlas.cmake"
  "${cosma_SOURCE_DIR}/cmake/FindSCALAPACK.cmake"
  "${cosma_SOURCE_DIR}/cmake/FindOPENBLAS.cmake"
  "${cosma_SOURCE_DIR}/cmake/FindFLEXIBLAS.cmake"
  "${cosma_SOURCE_DIR}/cmake/FindARMPL.cmake"
  "${cosma_SOURCE_DIR}/cmake/FindATLAS.cmake"
  "${cosma_SOURCE_DIR}/cmake/FindCRAY_LIBSCI.cmake"
  "${cosma_SOURCE_DIR}/cmake/FindGenericBLAS.cmake"
  "${cosma_SOURCE_DIR}/cmake/FindNCCL.cmake"
  "${cosma_SOURCE_DIR}/cmake/FindBLIS.cmake"
  DESTINATION "${CMAKE_INSTALL_LIBDIR}/cmake/cosma")

install(FILES "${cosma_BINARY_DIR}/cosma.pc"
  DESTINATION "${CMAKE_INSTALL_LIBDIR}/pkgconfig")

if(COSMA_WITH_TESTS)
  add_subdirectory(libs/gtest_mpi)
  enable_testing()
  add_subdirectory(tests)
endif()

if(COSMA_WITH_APPS)
  add_subdirectory(miniapp)
endif()

if(COSMA_WITH_BENCHMARKS AND NOT COSMA_BLAS MATCHES "OPENBLAS")
  add_subdirectory(benchmarks)
endif()


================================================
FILE: INSTALL.md
================================================
## Building COSMA

To build COSMA, do the following steps:
```bash
# clone the repository
git clone --recursive https://github.com/eth-cscs/COSMA.git
cd COSMA

# create a build directory within COSMA
mkdir build
cd build

# set up the compiler, e.g. with:
export CC=`which cc`
export CXX=`which CC`

# Choose which BLAS and SCALAPACK backends to use (e.g. MKL)
cmake -DCOSMA_BLAS=MKL -DCOSMA_SCALAPACK=MKL ..

# compile
make -j 8
```
> !! Note the *--recursive* flag !!

Other important options that can be passed to `cmake` are the following:
- `COSMA_BLAS:` `MKL` (default), `OPENBLAS`, `CRAY_LIBSCI`, `CUSTOM`, `CUDA` or `ROCM`. Determines which backend will be used for the local matrix multiplication calls.
- `COSMA_SCALAPACK:` OFF (default), `MKL`, `CRAY_LIBSCI`, `CUSTOM`. If specified, `COSMA` will also provide ScaLAPACK wrappers, thus offering `pdgemm`, `psgemm`, `pzgemm` and `pcgemm` functions, which completely match the ScaLAPACK API.

## Building COSMA on Multi-GPU Systems

COSMA can take advantage of fast GPU-to-GPU interconnects like NV-Links, through the use of the following:
- NCCL library (for NVIDIA GPUs), i.e. RCCL library (for AMD GPUs): when `-DCOSMA_WITH_NCCL=ON`, i.e. `-DCOSMA_WITH_RCCL=ON` is specified in `cmake`, all the collective communication is performed through these libraries, which can utilize fast gpu-to-gpu interconnects.
- GPU-aware MPI: when `-DCOSMA_WITH_GPU_AWARE_MPI=ON` is specified in `cmake`, cuda-aware MPI for NVIDIA GPUs (i.e. rocm-aware MPI for AMD GPUs) will be used for collective communication. The user must make sure that the gpu-aware MPI is enabled. For example, on Cray-systems, this can be done by setting the following environment variables: 
    - `export MPICH_RDMA_ENABLED_CUDA=1`
    - `export MPICH_GPU_SUPPORT_ENABLED=1`

## Building COSMA on Cray Systems

There are already prepared scripts for loading the necessary dependencies for COSMA on Cray-Systems:
- `Cray XC40` (CPU-only version): `source ./scripts/piz_daint_cpu.sh` loads `MKL` and other neccessary modules.
- `Cray XC50` (Hybrid version): `source ./scripts/piz_daint_gpu.sh` loads `cublas` and other necessary modules.

After the right modules are loaded, the instructions from the beginning of this file can be followed.

## Installing COSMA

To install do `make install`.

> !! Note: To set custom installation directory use `CMAKE_INSTALL_PREFIX` when building.

COSMA is CMake friendly and provides a cosmaConfig.cmake module for easy
integration into 3rd-party CMake projects with

```
find_package(cosma REQUIRED)
target_link_libraries( ... cosma::cosma)
```

COSMA's dependencies are taken care of internally, nothing else needs to be
linked. Make sure to set `CMAKE_INSTALL_PREFIX` to COSMA's installation directory
when building.

There is a rudimentary pkgconfig support; dependencies are handles explicitly by
consumers.

# Installing COSMA with Spack

- with OpenBLAS back end: `spack install cosma`
- with MKL back end:      `spack install cosma ^mkl`
- with GPU back end:      `spack install cosma +cuda`
- with Netlib LAPACK:     `spack install cosma ^netlib-lapack`
- with MKL ScaLAPACK:     `spack install cosma +scalapack ^mkl`

Notes:
- By default Spack builds in release mode with debug information included (-O2
  -g). To build with -O3, add `build_type=Release` to the command line.
- By default Spack selects openmpi as the MPI implementation, to select MPICH,
  add `^mpich`

For more information on Spack: [Spack 101 Tutorial](https://spack.readthedocs.io/en/latest/tutorial.html).

## Docker

COSMA can be installed into a Docker container in the following way:

```
docker build -f docker/gpu/build-env.Dockerfile -t cosma-build-env .
docker build --build-arg BUILD_ENV=cosma-build-env -f docker/gpu/deploy.Dockerfile -t cosma .
```

Then the `cosma` container can be deployed for testing:

```
docker run --rm -it -v (pwd):(pwd) --gpus all cosma
```


================================================
FILE: LICENSE
================================================
BSD 3-Clause License

Copyright (c) 2018, ETH Zürich.
Copyright (c) 2021, Advanced Micro Devices, Inc.
Copyright (c) 2018-2022, ETH Zürich.
All rights reserved.

Redistribution and use in source and binary forms, with or without modification,
are permitted provided that the following conditions are met:

1. Redistributions of source code must retain the above copyright notice, this
   list of conditions and the following disclaimer.

2. Redistributions in binary form must reproduce the above copyright notice,
   this list of conditions and the following disclaimer in the documentation
   and/or other materials provided with the distribution.

3. Neither the name of the copyright holder nor the names of its contributors
   may be used to endorse or promote products derived from this software without
   specific prior written permission.

THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND
ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE FOR
ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
(INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON
ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
(INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.


================================================
FILE: README.md
================================================
[![pipeline status](https://gitlab.com/cscs-ci/eth-cscs/COSMA/badges/master/pipeline.svg)](https://gitlab.com/cscs-ci/eth-cscs/COSMA/-/commits/master)

<p align="center"><img src="./docs/cosma-logo.svg" width="70%"></p>

## Table of Contents
- [Overview](#overview)
- [COSMA Literature](#cosma-literature)
- [Features](#features)
- [Building COSMA](#building-cosma)
- [COSMA Dependencies](#cosma-dependencies)
- [Using COSMA](#using-cosma)
    - [30 seconds Tutorial](#using-cosma-in-30-seconds)
- [COSMA on Multi-GPU Systems](#cosma-on-multi-gpu-systems)
    - [Using NCCL/RCCL Libraries](using-ncclrccl-libraries)
    - [Using GPU-aware MPI](#using-gpu-aware-mpi)
- [COSMA in production](#cosma-in-production)
    - [CP2K](#cp2k)
    - [Julia language](#julia-language)
- [Examples - Miniapps](#miniapps)
    - [Matrix Multiplication with COSMA](#matrix-multiplication)
    - [COSMA pxgemm wrapper](#cosma-pxgemm-wrapper)
- [Tunable Parameters](#tunable-parameters)
    - [Parameters Overview](#parameters-overview)
    - [Controlling GPU memory](#controlling-gpu-memory)
    - [Controlling CPU memory](#controlling-cpu-memory)
- [Performance Profiling](#profiling)
- [Authors](#authors)
- [Questions?](#questions)
- [Acknowledgements](#acknowledgements)

## Overview

COSMA is a parallel, high-performance, GPU-accelerated, matrix-matrix multiplication algorithm that is communication-optimal for all combinations of matrix dimensions, number of processors and memory sizes, without the need for any parameter tuning. The key idea behind COSMA is to first derive a tight optimal sequential schedule and only then parallelize it, preserving I/O optimality between processes. This stands in contrast with the 2D and 3D algorithms, which fix process domain decomposition upfront and then map it to the matrix dimensions, which may result in asymptotically more communication. The final design of COSMA facilitates the overlap of computation and communication, ensuring speedups and applicability of modern mechanisms such as RDMA. COSMA allows to not utilize some processors in order to optimize the processor grid, which reduces the communication volume even further and increases the computation volume per processor.

COSMA got the **Best Student Paper Award** at the prestigious **Supercomputing 2019** conference in Denver, US.

COSMA alleviates the issues of current state-of-the-art algorithms, which can be summarized as follows:

- `2D (SUMMA)`: Requires manual tuning and not communication-optimal in the presence of extra memory.
- `2.5D`: Optimal for `m=n`, but inefficient for `m << n` or `n << m` and for some numbers of processes `p`.
- `Recursive (CARMA)`: Asymptotically communication-optimal for all `m, n, k, p`, but splitting always the largest dimension might lead up to `√3` increase in communication volume.
- `COSMA (this work)`: Strictly communication-optimal (not just asymptotically) for all `m, n, k, p` and memory sizes that yields the speedups by factor of up to 8.3x over the second-fastest algorithm.

In addition to being communication-optimal, this implementation is higly-optimized to reduce the memory footprint in the following sense:
- `Buffer Reuse`: all the buffers are pre-allocated and carefully reused during execution, including the buffers necessary for the communication, which reduces the total memory usage.
- `Reduced Local Data Movement`: the assignment of data blocks to processes is fully adapted to communication pattern, which minimizes the need of local data reshuffling that arise after each communication step.

The library supports both one-sided and two-sided MPI communication backends. It uses `dgemm` for the local computations, but also has a support for the `GPU` acceleration through our `Tiled-MM` library using `cublas` or `rocBLAS`.

## COSMA Literature

The paper and other materials on COSMA are available under the following link:
- **ACM Digital Library (Best Student Paper Award at SC19):** https://dl.acm.org/doi/10.1145/3295500.3356181
- **Arxiv:** https://arxiv.org/abs/1908.09606
- **YouTube Presentation:** https://www.youtube.com/watch?v=5wiZWw5ltR0
- **Press Release:** https://www.cscs.ch/science/computer-science-hpc/2019/new-matrix-multiplication-algorithm-pushes-the-performance-to-the-limits/

## Features

- **[NEW] Multi-GPU Systems Support:** COSMA is now able to take advantage of fast GPU-to-GPU interconnects either through the use of NCCL/RCCL libraries or by using the GPU-aware MPI. Both, NVIDIA and AMD GPUs are supported.
- **ScaLAPACK API Support:** it is enough to link to COSMA, without changing the code and all `p?gemm` calls will use ScaLAPACK wrappers provided by COSMA.
- **C/Fortran Interface:** written in `C++`, but provides `C` and `Fortran` interfaces.
- **Custom Types:** fully templatized types.
- **GPU acceleration:** supports both **NVIDIA** and **AMD** GPUs.
- **Supported BLAS (CPU) backends:** MKL, LibSci, NETLIB, BLIS, ATLAS.
- **Custom Data Layout Support:** natively uses its own blocked data layout of matrices, but supports arbitrary grid-like data layout of matrices.
- **Tranposition/Conjugation Support:** matrices `A` and `B` can be transposed and/or conjugated.
- **Communication and Computation Overlap:** supports overlapping of communication and computation.
- **Spack Installation:** can be built and installed with `Spack` since v14.1
- **Julia Package:** see https://github.com/haampie/COSMA.jl/ on how to use COSMA in the Julia language.

## Building COSMA

See [Installation Instructions](INSTALL.md).

## COSMA Dependencies

COSMA is a CMake project and requires a recent CMake(>=3.17).

External dependencies:

- `MPI 3`: (required)
- `BLAS`: when the problem becomes local, COSMA uses provided `?gemm` backend, which can be one of the following:
     - `MKL` (default)
     - `OPENBLAS`
     - `BLIS`
     - `ATLAS`
     - `CRAY_LIBSCI`: `Cray-libsci` or `Cray-libsci_acc` (GPU-accelerated)
     - `CUDA`: `cublas` is used for NVIDIA GPUs
     - `ROCM`: `rocBLAS` is used for AMD GPUs
     - `CUSTOM`: user-provided BLAS API

Some dependencies are bundled as submodules and need not be installed explicitly:

- `TiledMM` - cublasXt GEMM replacement, that is also ported to AMD GPUs.
- `COSTA` - distributed matrix reshuffle and transpose algorithm.
- `semiprof` - profiling utlility
- `gtest_mpi` - MPI utlility wrapper over GoogleTest (unit testing library)

## Using COSMA

To allow easy integration, COSMA can be used in the following ways:
- **without changing your code:** if your code already uses the `ScaLAPACK API`, then you can just link to COSMA, before linking to any other library providing `pxgemm` and all `pxgemm` calls will be using COSMA, without the need to change your code at all. To get a feeling of the performance you can expect to get, please have a look at the [pdgemm miniapp](#cosma-pdgemm-wrapper). To see how you can link your code to COSMA `pxgemm`, have a look at the [30 seconds tutorial](#using-cosma-in-30-seconds) on how to do this. In this way, we integrated COSMA into CP2K quantum chemistry simulator, which you can read more about in the [production example](#cosma-in-production).

- **adapting your code:** if your code is not using ScaLAPACK, then there are two interfaces that can be used:
    - **custom layout:** if you matrices are distributed in a custom way, then it is eanough to pass the descriptors of your data layout to `multiply_using_layout` function, which will then adapt COSMA to your own layout.
    - **native COSMA layout:** to get the maximum performance, the native COSMA matrix layout should be used. To get an idea of the performance you can expect to get, please have a look at the [matrix multiplication miniapp](#matrix-multiplication).

The documentation for the latter option will soon be published here.

## Using COSMA in 30 seconds

For easy integration, it is enough to build COSMA with ScaLAPACK API and then link your code to COSMA before linking to any other library providing ScaLAPACK `pxgemm`. This way, all `pxgemm` calls will be using COSMA `pxgemm` wrappers. To achieve this, please follow these steps:

1) Build COSMA with ScaLAPACK API:
```bash
###############
# get COSMA
###############
git clone --recursive https://github.com/eth-cscs/COSMA cosma && cd cosma

##############################
# build and install COSMA
##############################
mkdir build && cd build

# set up the compiler, e.g. with:
export CC=`which cc`
export CXX=`which CC`

# choose BLAS and SCALAPACK versions you want to use
# COSMA_BLAS can be: MKL, OpenBLAS, CRAY_LIBSCI, CUDA, ROCM, CUSTOM
# COSMA_SCALAPACK can be MKL, CRAY_LIBSCI, CUSTOM
cmake -DCOSMA_BLAS=CUDA -DCOSMA_SCALAPACK=MKL -DCMAKE_INSTALL_PREFIX=<installation dir>/cosma ..
make -j 8
make install
```
> !! Note the *--recursive* flag !!

2) Link your code to COSMA:
    - **CPU-only** version of COSMA:
       - link your code to:
       > -L<installation dir>/cosma/lib64 -lcosma_pxgemm -lcosma -lcosta_scalapack

       - then link to the BLAS and ScaLAPACK you built COSMA with (see `COSMA_BLAS` and `COSMA_SCALAPACK` flags in cmake):
       > -L${MKLROOT}/lib/intel64 -Wl,--no-as-needed -lmkl_scalapack_lp64 -lmkl_intel_lp64 -lmkl_gnu_thread -lmkl_core -lmkl_blacs_intelmpi_lp64 -lgomp -lpthread -lm


   - using **GPU-accelerated** version of COSMA:
       - link your code to:
       >-L<installation dir>/cosma/lib64 -lcosma_pxgemm -lcosma -lcosta_scalapack -lTiled-MM

       - link to the GPU backend you built COSMA with (see `COSMA_BLAS` flag in cmake):
       >-lcublas -lcudart -lrt

       - then link to the ScaLAPACK you built COSMA with (see `COSMA_SCALAPACK` flag in cmake):
       >-L${MKLROOT}/lib/intel64 -Wl,--no-as-needed -lmkl_scalapack_lp64 -lmkl_intel_lp64 -lmkl_gnu_thread -lmkl_core -lmkl_blacs_intelmpi_lp64 -lgomp -lpthread -lm

3) Include headers:
>-I<installation dir>/cosma/include

## COSMA on Multi-GPU Systems

COSMA is able to take advantage of fast GPU-to-GPU interconnects on multi-gpu systems. This can be achieved in one of the following ways.

### Using `NCCL/RCCL` Libraries

When running `cmake` for COSMA, make sure to specify `-DCOSMA_WITH_NCCL=ON`, e.g. by doing:
```bash
    # NVIDIA GPUs
    # this will looks for NCCL library in the following environment variables:
    # - NCCL_ROOT: Base directory where all NCCL components are found
    # - NCCL_INCLUDE_DIR: Directory where NCCL header is found
    # - NCCL_LIB_DIR: Directory where NCCL library is found
    cmake -DCOSMA_BLAS=CUDA -DCOSMA_SCALAPACK=MKL -DCOSMA_WITH_NCCL=ON ..

    # AMD GPUs
    # this will looks for RCCL library in the following environment variables:
    # - RCCL_ROOT_DIR: Base directory where all RCCL components are found
    # - RCCL_INCLUDE_DIR: Directory where RCCL header is found
    # - RCCL_LIB_DIR: Directory where RCCL library is found
    cmake -DCOSMA_BLAS=CUDA -DCOSMA_SCALAPACK=MKL -DCOSMA_WITH_NCCL=ON ..
```
### Using GPU-aware MPI

When running `cmake` for COSMA, make sure that GPU-aware MPI is enabled in your environment and specify `-DCOSMA_WITH_GPU_AWARE_MPI=ON` when running cmake for COSMA, e.g. by doing:
```bash
    # Before running cmake, make sure that GPU-aware MPI is enabled on your system.
    # For example, on Cray-systems, this can be done by setting the following environment variables:
    # - export MPICH_RDMA_ENABLED_CUDA=1
    # - export MPICH_GPU_SUPPORT_ENABLED=1
    cmake -DCOSMA_BLAS=CUDA -DCOSMA_SCALAPACK=MKL -DCOSMA_WITH_GPU_AWARE_MPI=ON ..
```

## COSMA in Production

### CP2K

COSMA is integrated into the [CP2K](https://www.cp2k.org) quantum chemistry simulator. Since COSMA provides ScaLAPACK API, it is enough to link CP2K to COSMA, without changing CP2K code at all, which makes the integration trivial even if (as in the case of CP2K) the simulation code is in written Fortran.

In the production run, we ran *Random-Phase Approximation (RPA)* benchmark of 128 water molecules, using the *Resolution of Identity (RI)*. The benchmark was run once on 1024 and once on 128 nodes of the GPU partition on [Piz Daint supercomputer](https://www.cscs.ch/computers/piz-daint/) (Cray XC50). Computationally, the most dominant part of this benchmark consists of 46 **tall-and-skinny** dense matrix multiplications, with the parameters shown in the table below:

<p align="center"><img src="./docs/cp2k-benchmark.svg" width="80%"></p>

On **1024 nodes**, we compared the performance of CP2K using `COSMA` and `Cray-libsci_acc` (version: 19.10.1), both being GPU accelerated, for all dense matrix-matrix multiplications (`pdgemm` routine). As can be seen in the following table, the version with COSMA was approximately **2x faster**.

<p align="center"><img src="./docs/cp2k-results-1024.svg" width="60%"></p>

On **128 nodes**, we compared the performance of CP2K using the following algorithms for multiplying matrices (`pdgemm` routine):  `MKL` (version: 19.0.1.144), `Cray-libsci` (version: 19.06.1), `Cray-libsci_acc` (version: 19.10.1, GPU accelerated) and `COSMA` (both CPU-only and GPU-accelerated versions) libraries. The version with COSMA was the fastest on both CPU and GPU. The CPU version of COSMA achieved the peak performance, whereas the GPU version achieved more than 65\% of the peak performance of GPUs. Keep in mind that the peak performance of GPUs assumes the data is already residing on GPUs which is not the case here, since matrices were initially residing on CPU. This is one of the reasons why the peak performance is not achieved with the GPU version. Still, the GPU version of COSMA was **25-27\%** faster than the second best in this case. The results are summarized in the following table:

<p align="center"><img src="./docs/cp2k-results-128.svg" width="95%"></p>

With COSMA, even higher speedups are possible, depending on matrix shapes. To illustrate possible performance gains, we also ran different **square matrix** multiplications on the same number of nodes (**=128**) of [Piz Daint supercomputer](https://www.cscs.ch/computers/piz-daint/). The block size is `128x128` and the processor grid is also square: `16x16` (2 ranks per node). The performance of COSMA is compared against Intel MKL ScaLAPACK (version: 19.0.1.144). The results on Cray XC50 (GPU-accelerated) and Cray XC40 (CPU-only) are summarized in the following table:

<p align="center"><img src="./docs/square-results.svg" width="80%"></p>

All the results from this section assumed matrices given in (block-cyclic) ScaLAPACK data layout. However, if the native COSMA layout is used, even higher throughput is possible.

### Julia language

The [COSMA.jl](https://github.com/haampie/COSMA.jl/) Julia package uses COSMA's C-interface to provide COSMA-based matrix-matrix multiplication for the [DistributedArrays.jl](https://github.com/JuliaParallel/DistributedArrays.jl/) package. A minimal working example to multiply two random matrices looks as follows:

```julia
using MPIClusterManager, DistributedArrays, Distributed

manager = MPIManager(np = 6)
addprocs(manager)
COSMA.use_manager(manager)

@everywhere using COSMA

A = drand(8000, 8000) * drand(8000, 8000)
```

## Miniapps

```bash
# for CPU-only version
sbatch schedule_miniapp_on_daint_cpu.sh
# for Hybrid (CPU+GPU) version
sbatch schedule_miniapp_on_daint_gpu.sh
```
The script will use SLURM to submit a job on 10 nodes. The job will run 2 matrix
multiplications and output the time COSMA algorithm took.

### Matrix Multiplication

The project contains a miniapp that produces two random matrices `A` and `B`,
computes their product `C` with the COSMA algorithm and outputs the time of the
multiplication.

The miniapp consists of an executable `./build/miniapp/cosma_miniapp` which can
be run with the following command line (assuming we are in the root folder of
the project):

```bash
# set the number of threads to be used by each MPI rank
export OMP_NUM_THREADS=18
# if using CPU version with MKL backend, set MKL_NUM_THREADS as well
export MKL_NUM_THREADS=18
# run the miniapp
mpirun -np 4 ./build/miniapp/cosma_miniapp -m 1000 -n 1000 -k 1000 -r 2
```

The overview of all supported options is given below:
- `-m (--m_dim)` (default: `1000`): number of rows of matrices `A` and `C`.
- `-n (--n_dim)` (default: `1000`): number of columns of matrices `B` and `C`.
- `-k (--k_dim)` (default: `1000`): number of columns of matrix `A` and rows of matrix `B`.
- `-s (--steps)` (optional): string of triplets divided by comma defining the
  splitting strategy. Each triplet defines one step of the algorithm. The first
  character in the triplet defines whether it is a parallel (p) or a sequential
  (s) step. The second character defines the dimension that is splitted in this
  step. The third parameter is an integer which defines the divisor. This
  parameter can be omitted. In that case the default strategy will be used. An example of a possible value for the upper example: `--steps=sm2,pn2,pk2`.
- `-r (--n_rep)` (optional, default: `2`): the number of repetitions.
- `-t (--type)` (optional, default: `double`): data type of matrix entries. Can be one of: `float`, `double`, `zfloat` and `zdouble`. The last two correspond to complex numbers.
- `--test` (optional): if present, the result of COSMA will be verified with the result of the available SCALAPACK.
- `-h (--help) (optional)`: print available options.

### COSMA pxgemm wrapper

COSMA also contains a wrapper for ScaLAPACK `pxgemm` calls which offers scalapack interface (pxgemm functions with exactly the same signatures as ScaLAPACK). Running these functions will take care of transforming the matrices between ScaLAPACK and COSMA data layout, perform the multiplication using COSMA algorithm and transform the result back to the specified ScaLAPACK data layout.

The miniapp consists of an executable `./build/miniapp/pxgemm_miniapp` which can be run as follows (assuming we are in the root folder of the project):

```bash
# set the number of threads to be used by each MPI rank
export OMP_NUM_THREADS=18
# if using CPU version with MKL backend, set MKL_NUM_THREADS as well
export MKL_NUM_THREADS=18
# run the miniapp
mpirun -np 4 ./build/miniapp/pxgemm_miniapp -m 1000 -n 1000 -k 1000 \
                                            --block_a=128,128 \
                                            --block_b=128,128 \
                                            --block_c=128,128 \
                                            --p_grid=2,2 \
                                            --transpose=NN \
                                            --type=double \
                                            --algorithm=cosma
```

The overview of all supported options is given below:
- `-m (--m_dim)` (default: `1000`): number of rows of matrices `A` and `C`.
- `-n (--n_dim)` (default: `1000`): number of columns of matrices `B` and `C`.
- `-k (--k_dim)` (default: `1000`): number of columns of matrix `A` and rows of matrix `B`.
- `--block_a` (optional, default: `128,128`): 2D-block size for matrix A.
- `--block_b` (optional, default `128,128`): 2D-block size for matrix B.
- `--block_c` (optional, default `128,128`): 2D-block size for matrix C.
- `-p (--p_grid)` (optional, default: `1,P`): 2D-processor grid. By default `1xP` where `P` is the total number of MPI ranks.
- `--transpose` (optional, default: `NN`): transpose/conjugate flags to A and B.
- `--alpha` (optional, default: 1): alpha parameter in `C = alpha*A*B + beta*C`.
- `--beta` (optional, default: 0): beta parameter in `C = alpha*A*B + beta*C`.
- `-r (--n_rep)` (optional, default: 2): number of repetitions.
- `-t (--type)` (optional, default: `double`): data type of matrix entries. Can be one of: `float`, `double`, `zfloat` and `zdouble`. The last two correspond to complex numbers.
- `--test` (optional): if present, the result of COSMA will be verified with the result of the available SCALAPACK.
- `--algorithm` (optional, default: `both`): defines which algorithm (`cosma`, `scalapack` or `both`) to run.
- `-h (--help) (optional)`: print available options.

## Tunable Parameters

### Parameters Overview

The overview of tunable parameters, that can be set through environment variables is given in the table below. The default values are given in **bold**.

ENVIRONMENT VARIABLE | POSSIBLE VALUES | DESCRIPTION
| :------------------- | :------------------- |:------------------- |
`COSMA_OVERLAP_COMM_AND_COMP` | ON, **OFF** | If enabled, commmunication and computation might be overlapped, depending on the built-in heuristics.
`COSMA_ADAPT_STRATEGY` | **ON**, OFF | If enabled, COSMA will try to natively use the scalapack layout, without transforming to the COSMA layout.  Used only in the pxgemm wrapper.
`COSMA_CPU_MAX_MEMORY` | integer (`size_t`), by default: **infinite** | CPU memory limit in megabytes per MPI process (rank). Allowing too little memory might reduce the performance.
`COSMA_GPU_MEMORY_PINNING` | **ON**, OFF | If enabled, COSMA will pin parts of the host memory to speed up CPU-GPU memory transfers. Used only in the GPU backend.
`COSMA_GPU_MAX_TILE_M`, `COSMA_GPU_MAX_TILE_N`, `COSMA_GPU_MAX_TILE_K` | integer (`size_t`), by default: **5000** | Tile sizes for each dimension, that are used to pipeline the local CPU matrices to GPU. `K` refers to the shared dimension and `MxN` refer to the dimensions of matrix `C`
`COSMA_GPU_STREAMS` | integer (`size_t`), by default: **2** | The number of GPU streams that each rank should use.
`COSMA_MEMORY_POOL_AMORTIZATION` | real (`double`), by default **1.2** | The growth factor for the memory pool. If equal to 1.2, then 1.2x the requested size is allocated (thus, 20% more than needed). Higher values better amortize the cost of the memory pool resizing which can occur when the algorithm is invoked for different matrix sizes. However, higher amortization values also mean that potentially more memory is allocated than used which can be a problem when the memory resource is tight.
`COSMA_MIN_LOCAL_DIMENSION` | integer (`size_t`), by default: **200** | If any matrix dimension becomes smaller than this threshold (after splitting the matrices among the available MPI ranks), then the actual number of ranks is reduced so that all matrix dimensions stay at or above this limit.
`COSMA_DIM_THRESHOLD` | integer (`size_t`), by default: **0** | In SCALAPACK wrappers, if any matrix dimension is less than this threshold, the problem is considered too small and is dispatched to SCALAPACK for computation. This only affects the SCALAPACK wrappers.
`COSMA_CPU_MEMORY_ALIGNMENT` | integer (`size_t`), by default: **0** | The number of bytes to which all cpu (host) buffers will be aligned.


These are all optional parameters. They are used in runtime and hence changing any of those does not require the code to be recompiled.

We further discuss in details how to set the limits for both CPU and GPU memory that COSMA is allowed to use.

### Controlling GPU memory

Controlling how much GPU memory COSMA is allowed to use can be done by specifying the tile dimensions as:
```bash
export COSMA_GPU_MAX_TILE_M=5000
export COSMA_GPU_MAX_TILE_N=5000
export COSMA_GPU_MAX_TILE_K=5000
```
where `K` refers to the shared dimension and `MxN` refer to the dimensions of matrix `C`. By default, all tiles are square and have dimensions `5000x5000`.

These are only the maximum tiles and the actual tile sizes that will be used might be less, depending on the problem size. These variables are only used in the GPU backend for pipelining the local matrices to GPUs.

It is also possible to specify the number of GPU streams:
```bash
export COSMA_GPU_STREAMS=2
```

The values given here are the default values.

The algorithm will then require device memory for at most this many elements:
```cpp
num_streams * (tile_m * tile_k + tile_k * tile_n + tile_m * tile_n)
```

Therefore, by changing the values of these variables, it is possible to control the usage of GPU memory.

### Controlling CPU memory

In case the available CPU memory is a scarce resource, it is possible to set the CPU memory limit to COSMA, by exporting the following environment variable:
```bash
export COSMA_CPU_MAX_MEMORY=1024 # in megabytes per MPI process (rank)
```
which will set the upper limit [in MB] on the memory that each MPI process (rank) is allowed to use. This might, however, reduce the performance.

In case the algorithm is not able to perform the multiplication within the given memory range, a `runtime_error` will be thrown.

> This parameter is still in the testing phase!

## Profiling

Use `-DCOSMA_WITH_PROFILING=ON` to instrument the code. We use the profiler, called `semiprof`, written by Benjamin Cumming (https://github.com/bcumming).

Running the miniapp locally (from the project root folder) with the following command:

```bash
mpirun --oversubscribe -np 4 ./build/miniapp/cosma-miniapp -m 1000 -n 1000 -k 1000 -P 4
```

Produces the following output from rank 0:

```
Matrix dimensions (m, n, k) = (1000, 1000, 1000)
Number of processors: 4

_p_ REGION                     CALLS      THREAD        WALL       %
_p_ total                          -       0.110       0.110   100.0
_p_   multiply                     -       0.098       0.098    88.7
_p_     computation                2       0.052       0.052    47.1
_p_     communication              -       0.046       0.046    41.6
_p_       copy                     3       0.037       0.037    33.2
_p_       reduce                   3       0.009       0.009     8.3
_p_     layout                    18       0.000       0.000     0.0
_p_   preprocessing                3       0.012       0.012    11.3
```

The precentage is always relative to the first level above. All time measurements are in seconds.

## Authors

- Grzegorz Kwasniewski, Marko Kabic, Maciej Besta, Joost VandeVondele, Raffaele Solca, Torsten Hoefler

Cite as:
```
@inproceedings{cosma_algorithm_2019,
  title={Red-blue pebbling revisited: Near optimal parallel matrix-matrix multiplication},
  author={Kwasniewski, Grzegorz and Kabi{\'c}, Marko and Besta, Maciej and VandeVondele, Joost and Solc{\`a}, Raffaele and Hoefler, Torsten},
  booktitle={Proceedings of the International Conference for High Performance Computing, Networking, Storage and Analysis},
  pages={1--22},
  year={2019}
}
```

## Questions?

For questions, feel free to contact us, and we will soon get back to you:
- For questions regarding the implementation, contact Marko Kabic (marko.kabic@inf.ethz.ch), Teodor Nikolov (tnikolov@cscs.ch) or Simon Pintarelli (simon.pintarelli@cscs.ch).
- For questions regarding the theory, contact Grzegorz Kwasniewski (gkwasnie@inf.ethz.ch).

> If you need any help with the integration of COSMA into your library, we will be more than happy to help you!

## Acknowledgements

This work was funded in part by:

<img align="left" height="50" src="./docs/eth-logo.svg"> | [**ETH Zurich**](https://ethz.ch/en.html)**: Swiss Federal Institute of Technology in Zurich**
| :------------------- | :------------------- |
<img align="left" height="50" src="./docs/cscs-logo.jpg"> | [**CSCS**](https://www.cscs.ch)**: Swiss National Supercomputing Centre**
<img align="left" height="50" src="./docs/pasc-logo.png"> | [**PASC**](https://www.pasc-ch.org/)**: Platform for Advanced Scientific Computing**
<img align="left" height="50" src="./docs/erc-logo.png"> | [**ERC**](https://erc.europa.eu): **European Research Council** (Horizon2020, grant agreement DAPP, No.678880)
<img align="left" height="50" src="./docs/max-logo.jpg"> | [**MaX**](http://www.max-centre.eu): **Materials design at the Exascale** (Horizon2020, grant agreement MaX CoE, No. 824143.)

We thank Thibault Notargiacomo, Sam Yates, Benjamin Cumming and Simon Pintarelli for their generous contribution to the project: great ideas, useful advices and fruitful discussions.


================================================
FILE: _config.yml
================================================
theme: jekyll-theme-slate

================================================
FILE: benchmarks/CMakeLists.txt
================================================
include(find_cuda_version)

################
#  Build test  #
################
set(executables "ubench-allgather"
                "allgather-volume"
                "sendrecv"
                "reduce-scatter"
                "blocking_vs_non_blocking"
                "dgemm_perf_model")

# if (${COSMA_BLAS} STREQUAL "MKL")
#     list(APPEND executables "transpose")
# endif()

foreach(exec ${executables})
    add_executable(${exec} "${exec}.cpp")
    target_link_libraries(${exec} cosma)
endforeach()

if (COSMA_GPU_BACKEND MATCHES "CUDA")
  find_cuda_version()
  # check if cuda toolkit version >= 10.1
  # which is needed for cublasLt (used in the benchmark)
  if (CUDA_TOOLKIT_MAJOR_VERSION GREATER 10 OR
      (CUDA_TOOLKIT_MAJOR_VERSION EQUAL 10 AND CUDA_TOOLKIT_MINOR_VERSION GREATER_EQUAL 1))
    add_executable(gpu_gemm_cublas "gpu_gemm_cublas.cpp")
    target_link_libraries(gpu_gemm_cublas cosma Tiled-MM::Tiled-MM cublasLt cublas)
    target_compile_definitions(gpu_gemm_cublas PRIVATE COSMA_HAVE_GPU)
  endif()
endif()


================================================
FILE: benchmarks/allgather-volume.cpp
================================================
#include <cosma/interval.hpp>
#include <cosma/timer.hpp>

#include <mpi.h>

#include <algorithm>
#include <cctype>
#include <chrono>
#include <cstdlib>
#include <iostream>
#include <string>
#include <vector>

using namespace cosma;

int main(int argc, char **argv) {
    MPI_Init(&argc, &argv);

    int P, rank;
    MPI_Comm_size(MPI_COMM_WORLD, &P);
    MPI_Comm_rank(MPI_COMM_WORLD, &rank);

    int base_size = 1 << 25;
    int local_size = base_size;
    int total_size = P * base_size;

    std::vector<double> in(local_size);
    std::vector<double> result(total_size);

    const int n_rep = 10;

    {
        Timer time(n_rep, "MPI_Allgather");
        for (int i = 0; i < n_rep; ++i) {
            MPI_Allgather(in.data(),
                          local_size,
                          MPI_DOUBLE,
                          result.data(),
                          local_size,
                          MPI_DOUBLE,
                          MPI_COMM_WORLD);
        }
    }

    MPI_Finalize();
    return 0;
}


================================================
FILE: benchmarks/bcast-volume.cpp
================================================
#include <cosma/interval.hpp>
#include <cosma/timer.hpp>

#include <mpi.h>

#include <algorithm>
#include <cctype>
#include <cstdlib>
#include <iostream>
#include <string>
#include <vector>
#include <chrono>

using namespace cosma;

int main( int argc, char **argv ) {
    MPI_Init(&argc, &argv);

    int P, rank;
    MPI_Comm_size(MPI_COMM_WORLD, &P);
    MPI_Comm_rank(MPI_COMM_WORLD, &rank);

    int base_size = 1 << 25;
    int local_size = base_size;
    int total_size = P * base_size;

    std::vector<double> in(local_size);
    std::vector<double> result(total_size);

    const int n_rep = 10;

    {
        Timer time(n_rep, "MPI_Allgather");
        for (int i = 0; i < n_rep; ++i) {
            MPI_Allgather(in.data(), local_size, MPI_DOUBLE, result.data(),
                    local_size, MPI_DOUBLE, MPI_COMM_WORLD);
        }
    }

    MPI_Finalize();
    return 0;
}


================================================
FILE: benchmarks/blocking_vs_non_blocking.cpp
================================================
#include <cosma/local_multiply.hpp>

#include <mpi.h>

#include <chrono>
#include <cmath>
#include <iostream>
#include <tuple>
#include <unistd.h>
#include <vector>

class Timer {
  public:
    using time_point =
        std::chrono::time_point<std::chrono::high_resolution_clock>;

    int n_rep_;
    std::string region;
    MPI_Comm comm_;
    time_point start;

    Timer(int n_rep, std::string reg = "", MPI_Comm comm = MPI_COMM_WORLD)
        : n_rep_(n_rep)
        , region(reg)
        , comm_(comm) {
        MPI_Barrier(comm);
        start = std::chrono::high_resolution_clock::now();
    }

    ~Timer() {
        auto finish = std::chrono::high_resolution_clock::now();
        std::chrono::duration<double> elapsed = finish - start;
        auto time =
            std::chrono::duration_cast<std::chrono::milliseconds>(elapsed)
                .count();
        long long max_time, min_time, sum_time;
        MPI_Reduce(&time, &max_time, 1, MPI_LONG_LONG, MPI_MAX, 0, comm_);
        MPI_Reduce(&time, &min_time, 1, MPI_LONG_LONG, MPI_MIN, 0, comm_);
        MPI_Reduce(&time, &sum_time, 1, MPI_LONG_LONG, MPI_SUM, 0, comm_);
        int rank, size;
        MPI_Comm_rank(comm_, &rank);
        MPI_Comm_size(comm_, &size);
        if (rank == 0) {
            std::cout << region << " MIN TIME [ms]: " << 1.0 * min_time / n_rep_
                      << std::endl;
            std::cout << region << " MAX TIME [ms]: " << 1.0 * max_time / n_rep_
                      << std::endl;
            std::cout << region
                      << " AVG TIME [ms]: " << 1.0 * sum_time / (n_rep_ * size)
                      << std::endl;
            std::cout << "\n";
        }
    }
};

std::pair<int, int> group_and_offset(int P, int divisor, int rank) {
    int subset_size = P / divisor;
    int subint_index = rank / subset_size;
    int offset = rank - subint_index * subset_size;
    return {subint_index, offset};
}

void solve(double *A, double *B, double *C, int m, int n, int k) {
    // multiply square matrices with dimensions sqrt(local_size)
    auto ctx = cosma::make_context<double>();
    bool copy_c_back = true;
    cosma::local_multiply(ctx, A, B, C, m, n, k, 1.0, 0.0, copy_c_back);
}

int main(int argc, char **argv) {
    MPI_Init(&argc, &argv);

    int P, rank;
    MPI_Comm_size(MPI_COMM_WORLD, &P);
    MPI_Comm_rank(MPI_COMM_WORLD, &rank);

    int divisor = 2;
    int m = 5000;
    int k = 2000;
    int n = 2000;
    int n_iter = 3;
    size_t local_size = k * n / divisor;
    float waiting_time = 0.7f;
    std::vector<double> local_buffer(local_size);
    std::vector<double> global_buffer(local_size * divisor);

    std::vector<double> a(m / divisor * k);
    std::vector<double> b(k * n);
    std::vector<double> c(m / divisor * n);

    // initialize dgemm
    for (int i = 0; i < 5; ++i) {
        solve(a.data(), b.data(), c.data(), m / divisor, n / divisor, k);
    }

    {
        Timer dgemm_small(10, "dgemm subproblem");
        for (int i = 0; i < 10; ++i) {
            solve(a.data(), b.data(), c.data(), m / divisor, n / divisor, k);
        }
    }
    {
        Timer dgemm_large(10, "dgemm large problem");
        for (int i = 0; i < 10; ++i) {
            solve(a.data(), b.data(), c.data(), m / divisor, n, k);
        }
    }

    int gp, off;
    std::tie(gp, off) = group_and_offset(P, divisor, rank);
    MPI_Comm subcom;
    MPI_Comm_split(MPI_COMM_WORLD, off, gp, &subcom);

    MPI_Request req[2 * (divisor - 1)];

    int reqi = 0;
    for (int i = 0; i < divisor; ++i) {
        if (i != gp) {
            int offset = i * local_size;

            MPI_Recv_init(global_buffer.data() + offset,
                          local_size,
                          MPI_DOUBLE,
                          i,
                          0,
                          subcom,
                          &req[reqi]);
            MPI_Send_init(local_buffer.data(),
                          local_size,
                          MPI_DOUBLE,
                          i,
                          0,
                          subcom,
                          &req[divisor - 1 + reqi]);
            reqi++;
        }
    }

    {
        Timer timer_async(1, "asynchronous");
        MPI_Startall(2 * (divisor - 1), req);

        // do the work
        solve(a.data(), b.data(), c.data(), m / divisor, n / divisor, k);
        // usleep(waiting_time * 1e6);

        for (int i = 0; i < divisor - 1; ++i) {
            int idx = -1;
            MPI_Waitany(divisor - 1, req, &idx, MPI_STATUS_IGNORE);
            // if (idx >= rank) idx++;
            solve(a.data(), b.data(), c.data(), m / divisor, n / divisor, k);
            // usleep(waiting_time * 1e6);
        }

        MPI_Waitall(divisor - 1, req + divisor - 1, MPI_STATUSES_IGNORE);
    }

    MPI_Barrier(MPI_COMM_WORLD);

    {
        Timer timer_sync(1, "synchronous");

        MPI_Allgather(local_buffer.data(),
                      local_size,
                      MPI_DOUBLE,
                      global_buffer.data(),
                      local_size,
                      MPI_DOUBLE,
                      subcom);

        solve(a.data(), b.data(), c.data(), m / divisor, n, k);
        // usleep(1e6 * divisor * waiting_time);
    }

    MPI_Comm_free(&subcom);

    MPI_Finalize();
}


================================================
FILE: benchmarks/dgemm_perf_model.cpp
================================================
#include <cosma/local_multiply.hpp>
#include <cosma/timer.hpp>

#include <algorithm>
#include <chrono>
#include <vector>

using namespace cosma;

double sq_score(double a, double b) {
    double result = ((1.0 * a / b) + (1.0 * b / a)) /
                    (2.0 * std::max(1.0 * a / b, 1.0 * b / a));
    // double result = std::min(a, b) / std::max(a, b);
    return result;
}

double score(double m, double n, double k) {
    double score_a = sq_score(m, k);
    double score_b = sq_score(k, n);
    double score_c = sq_score(m, n);
    double result = score_a * score_b * score_c;
    return result;
}

double throughput(double m, double n, double k, double time) {
    return m * n * k * 2 / (1e6 * time);
}

struct problem {
    int m;
    int n;
    int k;

    double time;
    double score;

    double tps;

    problem() = default;
    problem(int mm, int nn, int kk, double tt, double ss, double thr)
        : m(mm)
        , n(nn)
        , k(kk)
        , time(tt)
        , score(ss)
        , tps(thr) {}
};

int main(int argc, char **argv) {
    std::vector<double> a;
    std::vector<double> b;
    std::vector<double> c;

    int min_m = 1000;
    int min_n = 1000;
    int min_k = 1000;

    int max_m = 50000;
    int max_n = 1000;
    int max_k = 1000;

    int step_m = 500;
    int step_n = 500;
    int step_k = 500;

    int n_rep = 2;

    auto ctx = cosma::make_context<double>();

    bool copy_c_back = true;

    // run random dgemm in order to initialize it
    for (int i = 0; i < n_rep; ++i) {
        a = std::vector<double>(min_m * min_m);
        b = std::vector<double>(min_m * min_m);
        c = std::vector<double>(min_m * min_m);

        local_multiply(
            ctx, a.data(), b.data(), c.data(), min_m, min_m, min_m, 1.0, 0.0, copy_c_back);
    }

    std::vector<problem> timings;

    for (int m = min_m; m <= max_m; m += step_m) {
        for (int n = min_n; n <= max_n; n += step_n) {
            for (int k = min_k; k <= max_k; k += step_k) {
                auto start = std::chrono::high_resolution_clock::now();
                for (int rep = 0; rep < n_rep; ++rep) {
                    a = std::vector<double>(m * k);
                    b = std::vector<double>(k * n);
                    c = std::vector<double>(m * n);

                    local_multiply(
                        ctx, a.data(), b.data(), c.data(), m, n, k, 1.0, 0.0, copy_c_back);
                }
                auto finish = std::chrono::high_resolution_clock::now();
                auto time =
                    std::chrono::duration_cast<std::chrono::milliseconds>(
                        finish - start)
                        .count();
                time /= 1.0 * n_rep;
                double mul_score = score(m, n, k);
                double tps = throughput(m, n, k, time);
                problem prob(m, n, k, time, mul_score, tps);
                timings.push_back(prob);
            }
        }
    }

    std::sort(timings.begin(),
              timings.end(),
              [](const problem &lhs, const problem &rhs) {
                  return lhs.tps < rhs.tps;
              });

    for (auto &problem : timings) {
        std::cout << problem.m << " " << problem.tps << " " << problem.score
                  << std::endl;
        // std::cout << "(" << problem.m << ", " << problem.n << ", " <<
        // problem.k << "), tps = " << problem.tps << ", score = " <<
        // problem.score << std::endl;
    }
    return 0;
}


================================================
FILE: benchmarks/gpu_gemm_cublas.cpp
================================================
#include <algorithm>
#include <cosma/local_multiply.hpp>

#include <cublasXt.h>
#include <cublasLt.h>
#include <cuda_runtime_api.h>
#include <Tiled-MM/util.hpp>
#include <Tiled-MM/mm_handle.hpp>
#include <random>

#include <chrono>
#include <vector>
#include <iostream>

template <typename T>
void fill_matrix(T* ptr, size_t size) {
    static std::random_device dev;                        // seed
    static std::mt19937 rng(dev());                       // generator
    static std::uniform_real_distribution<T> dist(10.0); // distribution

    for (unsigned i = 0; i < size; ++i) {
        ptr[i] = T{dist(rng)};
    }
}

std::vector<long> tiled_mm_dgemm(int n_iter, int m, int n, int k) {
    auto gpu_ctx = gpu::make_context<double>(2, 4000, 4000, 4000);

    std::vector<double> aa(m * k);
    std::vector<double> bb(k * n);
    std::vector<double> cc(m * n);

    double *a = aa.data();
    double *b = bb.data();
    double *c = cc.data();

    double alpha = 1.0;
    double beta = 0.0;

    std::vector<long> times(n_iter);
    for (int i = 0; i < n_iter; ++i) {
        fill_matrix(a, aa.size());
        fill_matrix(b, bb.size());
        if (beta > 0) {
            fill_matrix(c, cc.size());
        }

        // perform dgemm
        auto start = std::chrono::steady_clock::now();
        cosma::local_multiply(gpu_ctx.get(), a, b, c, m, n, k, alpha, beta);
        auto end = std::chrono::steady_clock::now();

        times[i] = std::chrono::duration_cast<std::chrono::milliseconds>(end - start).count();
    }

    std::sort(times.begin(), times.end());
    return times;
}

std::vector<long> cublasXt_dgemm(int n_iter, int m, int n, int k) {
    auto status=
    cudaSetDevice(0);
    gpu::check_runtime_status(status);

    cublasXtHandle_t handle;
    auto cublas_status = cublasXtCreate(&handle);
    gpu::check_blas_status(cublas_status);
    int devices[1] = {0};
    cublasXtDeviceSelect(handle, 1, devices);
    // cublasXtSetCpuRoutine(handle, CUBLASXT_GEMM, CUBLASXT_DOUBLE, (void*)(&dgemm_));
    // cublasXtSetCpuRatio(handle, CUBLASXT_GEMM, CUBLASXT_DOUBLE, 0.2);
    // cublasXtSetPinningMemMode(handle, CUBLASXT_PINNING_ENABLED);
    // cublasXtSetBlockDim(handle, 4000);

    std::vector<double> aa(m * k);
    std::vector<double> bb(k * n);
    std::vector<double> cc(m * n);

    double *a = aa.data();
    double *b = bb.data();
    double *c = cc.data();

    double alpha = 1.0;
    double beta = 0.0;

    std::vector<long> times(n_iter);

    for (int i = 0; i < n_iter; ++i) {
        fill_matrix(a, aa.size());
        fill_matrix(b, bb.size());
        if (beta > 0) {
            fill_matrix(c, cc.size());
        }

        // perform dgemm
        auto start = std::chrono::steady_clock::now();
        auto status = cublasXtDgemm(handle, CUBLAS_OP_N, CUBLAS_OP_N,
            m, n, k, &alpha, a, m, b, k, &beta, c, m);
        gpu::check_blas_status(status);
        cudaDeviceSynchronize();
        auto end = std::chrono::steady_clock::now();

        times[i] = std::chrono::duration_cast<std::chrono::milliseconds>(end - start).count();
    }

    std::sort(times.begin(), times.end());

    // finalization 
    if (handle)
        cublasXtDestroy(handle);

    return times;
}

/*
// cublasLt assumes device pointers
std::vector<long> cublasLt_dgemm(int n_iter, int m, int n, int k) {
    auto runtime_status=
    cudaSetDevice(0);
    gpu::check_runtime_status(runtime_status);

    cublasLtHandle_t handle;
    auto status = cublasLtCreate(&handle);
    gpu::check_blas_status(status);
    // int devices[1] = {0};
    // cublasLtDeviceSelect(handle, 1, devices);

    // std::vector<double> aa(m * k);
    // std::vector<double> bb(k * n);
    // std::vector<double> cc(m * n);

    double *a = gpu::malloc_device<double>(m * k);
    double *b = gpu::malloc_device<double>(k * n);
    double *c = gpu::malloc_device<double>(m * n);

    double alpha = 1.0;
    double beta = 0.0;

    std::size_t workspaceSize = 4000;
    std::size_t workspaceSizeBytes = workspaceSize * sizeof(double);
    auto workspace = gpu::malloc_device<double>(workspaceSize);

    auto transa = CUBLAS_OP_N;
    auto transb = CUBLAS_OP_N;

    cublasLtMatmulDesc_t operationDesc = nullptr;
    cublasLtMatrixLayout_t Adesc = nullptr;
    cublasLtMatrixLayout_t Bdesc = nullptr;
    cublasLtMatrixLayout_t Cdesc = nullptr;
    cublasLtMatmulPreference_t preference = nullptr;

    int returnedResults = 0;
    cublasLtMatmulHeuristicResult_t heuristicResult = {};

    status = cublasLtMatmulDescCreate(&operationDesc, CUDA_R_64F);
    gpu::check_blas_status(status);

    status = cublasLtMatmulDescSetAttribute(operationDesc, 
            CUBLASLT_MATMUL_DESC_TRANSA, &transa, sizeof(transa));
    gpu::check_blas_status(status);
    status = cublasLtMatmulDescSetAttribute(operationDesc, 
            CUBLASLT_MATMUL_DESC_TRANSB, &transb, sizeof(transb));
    gpu::check_blas_status(status);

    status = cublasLtMatrixLayoutCreate(&Adesc, CUDA_R_64F, m, k, m);
    gpu::check_blas_status(status);
    status = cublasLtMatrixLayoutCreate(&Bdesc, CUDA_R_64F, k, n, k);
    gpu::check_blas_status(status);
    status = cublasLtMatrixLayoutCreate(&Cdesc, CUDA_R_64F, m, n, m);
    gpu::check_blas_status(status);

    std::cout << "Created matrix layouts." << std::endl;

    status = cublasLtMatmulPreferenceCreate(&preference);
    gpu::check_blas_status(status);

    status = cublasLtMatmulPreferenceSetAttribute(
        preference, CUBLASLT_MATMUL_PREF_MAX_WORKSPACE_BYTES, 
        &workspaceSizeBytes, sizeof(workspaceSizeBytes));
    gpu::check_blas_status(status);

    std::cout << "Set up preferences." << std::endl;

    status = cublasLtMatmulAlgoGetHeuristic(
        handle, operationDesc, Adesc, Bdesc, Cdesc, Cdesc, 
        preference, 1, &heuristicResult, &returnedResults);
    gpu::check_blas_status(status);

    if (returnedResults == 0) {
        std::cout << "No algorithm was returned." << std::endl;
        status = CUBLAS_STATUS_NOT_SUPPORTED;
        gpu::check_blas_status(status);
    }

    std::cout << "Chose the algorithm." << std::endl;

    std::vector<long> times(n_iter);

    for (int i = 0; i < n_iter; ++i) {
        // fill_matrix(a, m * k);
        // fill_matrix(b, k * n);
        // if (beta > 0) {
        //     fill_matrix(c, m * n);
        // }

        // perform dgemm
        auto start = std::chrono::steady_clock::now();
        status = cublasLtMatmul(handle,
                               operationDesc,
                               &alpha,
                               a,
                               Adesc,
                               b,
                               Bdesc,
                               &beta,
                               c,
                               Cdesc,
                               c,
                               Cdesc,
                               &heuristicResult.algo,
                               workspace,
                               workspaceSizeBytes,
                               0);
        gpu::check_blas_status(status);
        cudaDeviceSynchronize();
        auto end = std::chrono::steady_clock::now();

        times[i] = std::chrono::duration_cast<std::chrono::milliseconds>(end - start).count();
    }

    std::sort(times.begin(), times.end());

    // finalization 
    if (handle)
        cublasLtDestroy(handle);
    // Descriptors are no longer needed as all GPU work was already
   // enqueued.
   if (preference) 
       status = cublasLtMatmulPreferenceDestroy(preference);
   if (Cdesc) 
       status = cublasLtMatrixLayoutDestroy(Cdesc);
   if (Bdesc) 
       status = cublasLtMatrixLayoutDestroy(Bdesc);
   if (Adesc) 
       status = cublasLtMatrixLayoutDestroy(Adesc);
   if (operationDesc) 
       status = cublasLtMatmulDescDestroy(operationDesc);
    gpu::check_blas_status(status);

    return times;
}
*/

int main(int argc, char* argv[]) {
    // std::vector<int> dims = {500, 1000, 2000, 4000, 8000, 16000, 32000};
    std::vector<int> dims = {4000, 8000, 12000, 16000, 20000, 24000, 28000, 32000};
    int n_iter = 2;

    std::vector<long> times(n_iter);

    for (const int& dim : dims) {
        std::cout << "Dimension = " << dim << std::endl;
        /*
        // cublasLt
        times = cublasLt_dgemm(n_iter, dim, dim, dim);
        std::cout << "cublasLt: ";
        for (const auto& time : times) {
            std::cout << time << ", ";
        }
        std::cout << std::endl;
        */

        // cublasXt
        times = cublasXt_dgemm(n_iter, dim, dim, dim);
        std::cout << "cublasXt: ";
        for (const auto& time : times) {
            std::cout << time << ", ";
        }
        if (times.size()) {
            std::cout << "highest throughtput [Glop/s]: " << 2.0*dim*dim*dim/(1e6*times[0]);
        }
        std::cout << std::endl;

        // tiled-mm
        times = tiled_mm_dgemm(n_iter, dim, dim, dim);
        std::cout << "Tiled-MM: ";
        for (const auto& time : times) {
            std::cout << time << ", ";
        }
        if (times.size()) {
            std::cout << "highest throughtput [Glop/s]: " << 2.0*dim*dim*dim/(1e6*times[0]);
        }
        std::cout << std::endl;
    }

}


================================================
FILE: benchmarks/gpu_gemm_libsci_acc.cpp
================================================

#include <libsci_acc.h>

#include <chrono>
#include <vector>
#include <iostream>

long libsci_acc_dgemm(int m, int n, int k) {
    double* a, *b, *c;

    double alpha = 1.0;
    double beta = 0.0;

    libsci_acc_HostAlloc((void**)&a, sizeof(double)*m*k);
    libsci_acc_HostAlloc((void**)&b, sizeof(double)*k*n);
    libsci_acc_HostAlloc((void**)&c, sizeof(double)*m*n);

    // perform dgemm
    auto start = std::chrono::steady_clock::now();
    dgemm('n', 'n', m, n, k, alpha, a, m, b, k, beta, c, m);
    auto end = std::chrono::steady_clock::now();

    libsci_acc_HostFree(a);
    libsci_acc_HostFree(b);
    libsci_acc_HostFree(c);

    return std::chrono::duration_cast<std::chrono::milliseconds>(end - start).count();
}

int main(int argc, char* argv[]) {
    // initialization
    libsci_acc_init();

    // std::vector<int> dims = {500, 1000, 2000, 4000, 8000, 16000, 32000};
    std::vector<int> dims = {32000};
    int n_iter = 1;

    for (const int& dim : dims) {
        std::cout << "Dimension = " << dim << std::endl;
        double t_avg_libsci = 0;

        for (int i = 0; i < n_iter+1; ++i) {
            long t_libsci = libsci_acc_dgemm(dim, dim, dim);

            if (i == 0) continue;

            t_avg_libsci += t_libsci;
        }
        std::cout << "libsci average time [ms]: " << 1.0*t_avg_libsci/n_iter << std::endl;
    }
    libsci_acc_finalize();
}


================================================
FILE: benchmarks/reduce-scatter.cpp
================================================
#include <cosma/interval.hpp>
#include <cosma/timer.hpp>

#include <mpi.h>

#include <algorithm>
#include <cctype>
#include <cstdlib>
#include <iostream>
#include <string>
#include <vector>
#include <chrono>

using namespace cosma;

int main( int argc, char **argv ) {
    MPI_Init(&argc, &argv);

    int P, rank;
    MPI_Comm_size(MPI_COMM_WORLD, &P);
    MPI_Comm_rank(MPI_COMM_WORLD, &rank);

    const int n_rep = 2;
    int scaling_factor = P;

    /*
    for (int i = -10; i <= 10; ++i) {
        int dim = (scaling_factor+i)*P;
        int block_size = (dim/P) * dim;
        int total_size = block_size * P;

        if (rank == 0)
            std::cout << "dim = " << dim << std::endl;

        std::vector<double> in(total_size);
        std::vector<double> result(block_size);
        MPI_Request reqs[2];

        {
            Timer time(n_rep, "MPI_Reduce_scatter_block");
            for (int i = 0; i < n_rep; ++i) {
                MPI_Ireduce_scatter_block(in.data(),
                                   result.data(),
                                   block_size/2,
                                   MPI_DOUBLE,
                                   MPI_SUM,
                                   MPI_COMM_WORLD,
                                   &reqs[0]);
                MPI_Ireduce_scatter_block(in.data(),
                                   result.data(),
                                   block_size/2,
                                   MPI_DOUBLE,
                                   MPI_SUM,
                                   MPI_COMM_WORLD,
                                   &reqs[1]);
                MPI_Waitall(2, &reqs[0], MPI_STATUSES_IGNORE);
            }
        }
    }
    */
    int dim = 17408;
    int block_size = (dim/P) * dim;
    int total_size = block_size * P;
    std::vector<double> in(total_size);
    std::vector<double> result(block_size);
    {
        Timer time(n_rep, "MPI_Reduce_scatter_block");
        for (int i = 0; i < n_rep; ++i) {
            MPI_Reduce_scatter_block(in.data(),
                               result.data(),
                               block_size,
                               MPI_DOUBLE,
                               MPI_SUM,
                               MPI_COMM_WORLD);
        }
    }

    MPI_Finalize();
    return 0;
}


================================================
FILE: benchmarks/run_ubench.sh
================================================
n_nodes_list=(2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36)

run() {

    n_nodes=$1
    srun -N $n_nodes -n $n_nodes ./tests/ubench/ubench-allgather
}

IFS=

for n_nodes in ${n_nodes_list[@]}
do 
    echo "NODES = "$n_nodes
    output=$(run $n_nodes)
    avg_time_v=$(echo $output | awk '/MPI_Allgatherv AVG TIME/ {print $5}')
    avg_time=$(echo $output | awk '/MPI_Allgather AVG TIME/ {print $5}')
    echo $output
    echo "avg_time_v = "$avg_time_v
    echo "avg_time = "$avg_time
    echo $avg_time_v >> "allgather_v.txt"
    echo $avg_time >> "allgather.txt"
done


================================================
FILE: benchmarks/scalapack_transformer.cpp
================================================
#include <cosma/math_utils.hpp>

#include <grid2grid/transform.hpp>
#include <grid2grid/scalapack_layout.hpp>
#include <mpi.h>

#include <iostream>
#include <iomanip>
#include <string>
#include <fstream>
#include <sstream>
#include <chrono>
#include <array>
#include <numeric>

using namespace grid2grid;

extern "C" {
    /* Cblacs declarations */
    void Cblacs_pinfo(int*, int*);
    void Cblacs_get(int, int, int*);
    void Cblacs_gridinit(int*, const char*, int, int);
    void Cblacs_pcoord(int, int, int*, int*);
    void Cblacs_gridexit(int);
    void Cblacs_barrier(int, const char*);

    int numroc_(int*, int*, int*, int*, int*);

    void pdgemr2d_(int *m, int *n,
            double *a, int *ia, int *ja, int *desca,
            double *b, int *ib, int *jb, int *descb,
            int* ictxt);

    void descinit_(int* desc, int* m, int* n, int* bm, int* bn, 
            int* rsrc, int* csrc, int* ctxt, int* lda, int* info);
}

// *****************************
// OUR LAYOUT TRANSFORMER
// *****************************
long int run_our_layout(int m, int n, int bm1, int bn1, int bm2, int bn2, int pm, int pn, int nrep, int rank) {
    auto ordering = scalapack::ordering::row_major;

    auto values = [](int i, int j) {
        return cosma::math_utils::cantor_pairing(i, j);
    };

    scalapack::data_layout layout1({m, n}, {bm1, bn1}, {pm, pn}, ordering);
    std::vector<double> buffer1 = initialize_locally(rank, layout1, values);
    grid_layout scalapack_layout_1 = get_scalapack_grid(layout1, buffer1.data(), rank);

    scalapack::data_layout layout2({m, n}, {bm2, bn2}, {pm, pn}, ordering);
    std::vector<double> buffer2 = initialize_locally(rank, layout2, values);
    grid_layout scalapack_layout_2 = get_scalapack_grid(layout2, buffer2.data(), rank);

    long int min_time = std::numeric_limits<long int>::max();

    for (int i = 0; i < nrep; ++i) {
        MPI_Barrier(MPI_COMM_WORLD);
        auto start = std::chrono::steady_clock::now();

        transform(scalapack_layout_1, scalapack_layout_2, MPI_COMM_WORLD);

        MPI_Barrier(MPI_COMM_WORLD);
        auto end = std::chrono::steady_clock::now();

        auto our_time = std::chrono::duration_cast<std::chrono::milliseconds>(end - start).count();
        min_time = std::min(our_time, min_time);
    }

    return min_time;
}

// *****************************
// SCALAPACK LAYOUT TRANSFORMER
// *****************************
long int run_scalapack_layout(int m, int n, int bm1, int bn1, int bm2, int bn2, int pm, int pn, int nrep, int rank) {
    // Begin Cblas context
    // We assume that we have 4 processes and place them in a 2-by-2 grid
    int iZERO = 0;
    int ctxt, myid, myrow, mycol, numproc;
    int procrows = 2, proccols = 2;

    Cblacs_pinfo(&myid, &numproc);
    Cblacs_get(0, 0, &ctxt);
    Cblacs_gridinit(&ctxt, "Row-major", procrows, proccols);
    Cblacs_pcoord(ctxt, myid, &myrow, &mycol);

    // Number of rows and cols owned by the current process
    int nrows1 = numroc_(&m, &bm1, &myrow, &iZERO, &procrows);
    int ncols1 = numroc_(&n, &bn1, &mycol, &iZERO, &proccols);

    int nrows2 = numroc_(&m, &bm2, &myrow, &iZERO, &procrows);
    int ncols2 = numroc_(&n, &bn2, &mycol, &iZERO, &proccols);

    std::vector<double> buffer1(nrows1 * ncols1);
    std::vector<double> buffer2(nrows2 * ncols2);

    int ia = 1;
    int ja = 1;
    int ib = 1;
    int jb = 1;

    // std::vector<int> desca = {1, ctxt, m, n, bm1, bn1, 0, 0, m};
    // std::vector<int> descb = {1, ctxt, m, n, bm2, bn2, 0, 0, m};

    std::array<int, 9> desc1;
    std::array<int, 9> desc2;
    int info;
    descinit_(&desc1[0], &m, &n, &bm1, &bn1, &iZERO, &iZERO, &ctxt, &nrows1, &info);
    descinit_(&desc2[0], &m, &n, &bm2, &bn2, &iZERO, &iZERO, &ctxt, &nrows2, &info);

    long int min_time = std::numeric_limits<long int>::max();
    for (int i = 0; i < nrep; ++i) {
        MPI_Barrier(MPI_COMM_WORLD);
        auto start = std::chrono::steady_clock::now();
        pdgemr2d_(&m, &n, buffer1.data(), &ia, &ib, &desc1[0],
                buffer2.data(), &ib, &jb, &desc2[0],
                &ctxt);

        MPI_Barrier(MPI_COMM_WORLD);
        auto end = std::chrono::steady_clock::now();
        auto scalapack_time = std::chrono::duration_cast<std::chrono::milliseconds>(end - start).count();
        min_time = std::min(min_time, scalapack_time);
    }

    // Release resources
    Cblacs_gridexit(ctxt);

    return min_time;
}

int main(int argc, char **argv) {
    MPI_Init(&argc, &argv);

    int P, rank;
    MPI_Comm_size(MPI_COMM_WORLD, &P);
    MPI_Comm_rank(MPI_COMM_WORLD, &rank);

    int dim = 10000;
    int bm1 = 124;
    int bn1 = 124;

    int pm = 2;
    int pn = 2;

    int bm2 = 192;
    int bn2 = 192;

    int nrep = 3;

    for (int i = 1; i <= 5; ++i) { 
        int m = dim * i;
        int n = dim * i;

        auto our_time = run_our_layout(m, n, bm1, bn1, bm2, bn2, pm, pn, nrep, rank);
        auto scalapack_time = run_scalapack_layout(m, n, bm1, bn1, bm2, bn2, pm, pn, nrep, rank);

        if (rank == 0) {
            std::cout << "Dimension = " << m << std::endl;
            std::cout << "Our time [ms] = " << our_time << std::endl;
            std::cout << "ScaLAPACK time [ms] = " << scalapack_time << std::endl;
            std::cout << "Ration scalapack/our = " << 1.0 * scalapack_time/our_time << std::endl;
            std::cout << "============================" << std::endl;
        }
    }

    MPI_Finalize();
    return 0;
}


================================================
FILE: benchmarks/sendrecv.cpp
================================================
#include <cosma/interval.hpp>
#include <cosma/timer.hpp>

#include <mpi.h>

#include <algorithm>
#include <cctype>
#include <chrono>
#include <cstdlib>
#include <iostream>
#include <string>
#include <vector>

using namespace cosma;

int main(int argc, char **argv) {
    MPI_Init(&argc, &argv);

    int P, rank;
    MPI_Comm_size(MPI_COMM_WORLD, &P);
    MPI_Comm_rank(MPI_COMM_WORLD, &rank);

    int base_size = 1 << 25;
    int local_size = base_size;

    std::vector<double> in(local_size);
    std::vector<double> result(local_size);

    const int n_rep = 10;
    for (int i = 0; i < n_rep; ++i) {
        int target = 1 - rank;
        MPI_Sendrecv(in.data(),
                     local_size,
                     MPI_DOUBLE,
                     target,
                     0,
                     result.data(),
                     local_size,
                     MPI_DOUBLE,
                     target,
                     0,
                     MPI_COMM_WORLD,
                     MPI_STATUS_IGNORE);
    }

    MPI_Finalize();
    return 0;
}


================================================
FILE: benchmarks/transpose.cpp
================================================
#include <costa/grid2grid/memory_utils.hpp>
#include <costa/grid2grid/threads_workspace.hpp>
#include <mkl.h>
#include <chrono>
#include <limits>

int main(int argc, char** argv) {
    int n_rep = 3;
    // dimensions before transposing
    std::vector<int> n_rows = {5000, 10000, 15000, 20000, 25000, 30000}; // 5000;
    std::vector<int> n_cols = {5000, 10000, 15000, 20000, 25000, 30000}; // 10000;

    // not strided
    auto src_stride = n_rows; // 5000;
    auto  dest_stride = n_cols; // 10000;
    bool conjugate = false;

    costa::memory::threads_workspace<double> workspace(256);

    std::vector<long> g2g_times;
    std::vector<long> mkl_times;

    for (int i = 0; i < n_rows.size(); ++i) {
        long g2g_time = std::numeric_limits<long>::max();
        long mkl_time = std::numeric_limits<long>::max();

        src_stride[i] = std::max(n_rows[i], src_stride[i]);
        // since transposed
        dest_stride[i] = std::max(n_cols[i], dest_stride[i]);

        std::vector<double> src(src_stride[i] * n_cols[i]);
        std::vector<double> dest_g2g(dest_stride[i] * n_rows[i]);
        std::vector<double> dest_mkl(dest_stride[i] * n_rows[i]);

        for (int row = 0; row < n_rows[i]; ++row) {
            for (int col = 0; col < n_cols[i]; ++col) {
                src[col * src_stride[i] + row] = col * src_stride[i] + row;
            }
        }

        for (int rep = 0; rep < n_rep; ++rep) {
            // ***********************************
            // transpose with costa 
            // ***********************************
            auto start = std::chrono::steady_clock::now();
            costa::memory::copy_and_transpose<double>(src.data(), n_rows[i], n_cols[i], src_stride[i],
                                                  dest_g2g.data(), dest_stride[i], false, workspace);
            auto end = std::chrono::steady_clock::now();
            g2g_time = std::min(g2g_time, (long) std::chrono::duration_cast<std::chrono::milliseconds>(end - start).count());

            // ***********************************
            // transpose with mkl
            // ***********************************
            start = std::chrono::steady_clock::now();
            mkl_domatcopy('C', 'T', n_rows[i], n_cols[i], 1.0, src.data(), src_stride[i], dest_mkl.data(), dest_stride[i]);
            end = std::chrono::steady_clock::now();
            mkl_time = std::min(mkl_time, (long) std::chrono::duration_cast<std::chrono::milliseconds>(end - start).count());
        }

        g2g_times.push_back(g2g_time);
        mkl_times.push_back(mkl_time);

        // ***********************************
        // checking results
        // ***********************************
        int n_rows_t = n_cols[i];
        int n_cols_t = n_rows[i];
        for (int row = 0; row < n_rows_t; ++row) {
            for (int col = 0; col < n_cols_t; ++col) {
                // dest_stride >= n_cols
                auto g2g = dest_g2g[col * dest_stride[i] + row];
                auto mkl = dest_mkl[col * dest_stride[i] + row];
                auto target = src[row * src_stride[i] + col];
                if (g2g != mkl) {
                    std::cout << "Error: (" << col << ", " << row << ") = " << ", g2g = " << g2g << ", mkl = " << mkl << ", target = " << target << std::endl;
                }
            }
        }
    }

    // ***********************************
    // output COSTA timings
    // ***********************************
    std::cout << "COSTA times: " << std::endl;
    for (int i = 0; i < g2g_times.size(); ++i) {
        std::cout << g2g_times[i] << ", ";
    }
    std::cout << std::endl;

    // ***********************************
    // output MKL timings
    // ***********************************
    std::cout << "mkl times: " << std::endl;
    for (int i = 0; i < mkl_times.size(); ++i) {
        std::cout << mkl_times[i] << ", ";
    }
    std::cout << std::endl;

    return 0;
}


================================================
FILE: benchmarks/ubench-allgather.cpp
================================================
#include <cosma/interval.hpp>
#include <cosma/timer.hpp>

#include <mpi.h>

#include <algorithm>
#include <cctype>
#include <chrono>
#include <cstdlib>
#include <iostream>
#include <string>
#include <vector>

using namespace cosma;

int main(int argc, char **argv) {
    MPI_Init(&argc, &argv);

    int P, rank;
    MPI_Comm_size(MPI_COMM_WORLD, &P);
    MPI_Comm_rank(MPI_COMM_WORLD, &rank);

    int base_size = 1500000;
    int var = base_size / 10;
    int local_size = base_size + ((rank % 2 == 0) ? var : 0);
    int max_size = -1;
    int total_size = 0;

    std::vector<int> sizes(P);
    std::vector<int> dspls(P);

    for (int i = 0; i < P; ++i) {
        int local_size = base_size + ((i % 2 == 0) ? var : 0);
        max_size = std::max(max_size, local_size);
        sizes[i] = local_size;
        dspls[i] = total_size;
        total_size += local_size;
    }

    std::vector<double> in(local_size);
    std::vector<double> in_padded(max_size);

    std::vector<double> result(total_size);
    std::vector<double> result_padded(P * max_size);

    const int n_rep = 30;

    {
        Timer time(n_rep, "MPI_Allgatherv");
        for (int i = 0; i < n_rep; ++i) {
            MPI_Allgatherv(in.data(),
                           local_size,
                           MPI_DOUBLE,
                           result.data(),
                           sizes.data(),
                           dspls.data(),
                           MPI_DOUBLE,
                           MPI_COMM_WORLD);
        }
    }

    {
        Timer time(n_rep, "MPI_Allgather");
        for (int i = 0; i < n_rep; ++i) {
            MPI_Allgather(in_padded.data(),
                          max_size,
                          MPI_DOUBLE,
                          result_padded.data(),
                          max_size,
                          MPI_DOUBLE,
                          MPI_COMM_WORLD);
        }
    }

    MPI_Finalize();
    return 0;
}


================================================
FILE: bors.toml
================================================
status = [
  "ci/gitlab/%",
]
delete_merged_branches = true


================================================
FILE: ci/baseimage.cuda.Dockerfile
================================================
FROM ubuntu:24.04 as builder

ARG CUDA_ARCH=90

ENV DEBIAN_FRONTEND noninteractive

ENV FORCE_UNSAFE_CONFIGURE 1

ENV PATH="/spack/bin:${PATH}"

ENV MPICH_VERSION=4.3.2
ENV CMAKE_VERSION=3.30.9

RUN apt-get -y update

RUN apt-get install -y apt-utils

# install basic tools
RUN apt-get install -y --no-install-recommends gcc g++ gfortran clang libomp-14-dev git make unzip file \
  vim wget pkg-config python3-pip python3-dev cython3 python3-pythran tcl m4 cpio curl automake meson \
  xz-utils patch patchelf apt-transport-https ca-certificates gnupg software-properties-common perl tar bzip2 \
  liblzma-dev libbz2-dev

# install CMake
RUN wget https://github.com/Kitware/CMake/releases/download/v${CMAKE_VERSION}/cmake-${CMAKE_VERSION}-linux-$(uname -m).tar.gz -O cmake.tar.gz && \
    tar zxvf cmake.tar.gz --strip-components=1 -C /usr

# get latest version of spack
RUN git clone -b releases/v1.1 https://github.com/spack/spack.git

# set the location of packages built by spack
RUN spack config add config:install_tree:root:/opt/local
# set cuda_arch for all packages
RUN spack config add packages:all:variants:cuda_arch=${CUDA_ARCH}

# add local repo for cosma and tiled-mm
COPY ./spack_repo /spack_repo
RUN spack repo add /spack_repo/cosma

# find all external packages
RUN spack external find --all --exclude python --exclude meson

# find compilers
RUN spack compiler find

# install MPICH
RUN spack install mpich@${MPICH_VERSION} %gcc

# for the MPI hook
RUN echo $(spack find --format='{prefix.lib}' mpich) > /etc/ld.so.conf.d/mpich.conf
RUN ldconfig

# # create environments for several configurations and install dependencies
RUN spack env create -d /cosma-env-cuda && \
    spack -e /cosma-env-cuda add "cosma@=master +cuda +tests +scalapack +shared %gcc  ^mpich" && \
    spack -e /cosma-env-cuda add "tiled-mm@=master" && \
    spack -e /cosma-env-cuda develop -p "./tiled-mm" "tiled-mm" && \
    spack -e /cosma-env-cuda add "costa@=master" && \
    spack -e /cosma-env-cuda develop -p "./costa" "costa" && \
    spack -e /cosma-env-cuda develop -p /src cosma@master
RUN spack -e /cosma-env-cuda install --only=dependencies --fail-fast
RUN spack clean -a

# RUN spack env create -d /cosma-env-cuda-gpu-direct && \
#     spack -e /cosma-env-cuda-gpu-direct add "cosma@master +cuda +tests +scalapack +shared +gpu_direct %gcc  ^mpich " && \
#     spack -e /cosma-env-cuda-gpu-direct add "tiled-mm@master" && \
#     spack -e /cosma-env-cuda-gpu-direct add "costa@master" && \
#     spack -e /cosma-env-cuda-gpu-direct add "cuda@12" && \
#     spack -e /cosma-env-cuda-gpu-direct develop -p /src cosma@master && \
#     spack -e /cosma-env-cuda-gpu-direct install --only=dependencies --fail-fast

# RUN spack env create -d /cosma-env-cuda-nccl && \
#     spack -e /cosma-env-cuda-nccl add "cosma@master +cuda +tests +scalapack +shared +nccl  %gcc ^mpich " && \
#     spack -e /cosma-env-cuda-nccl add "tiled-mm@2.3.1" && \
#     spack -e /cosma-env-cuda-nccl add "costa@master" && \
#     spack -e /cosma-env-cuda-nccl add "cuda@12" && \
#     spack -e /cosma-env-cuda-nccl develop -p /src cosma@master && \
#     spack -e /cosma-env-cuda-nccl install --only=dependencies --fail-fast

# RUN spack env create -d /cosma-env-cpu && \
#     spack -e /cosma-env-cpu add "cosma@master ~cuda +tests +scalapack +shared %gcc  ^mpich " && \
#     spack -e /cosma-env-cpu add "costa@master" && \
#     spack -e /cosma-env-cpu develop -p /src cosma@master && \
#     spack -e /cosma-env-cpu install --only=dependencies --fail-fast


================================================
FILE: ci/build.Dockerfile
================================================
ARG BASE_IMAGE
FROM $BASE_IMAGE

ARG ENVPATH

# copy source files of the pull request into container
COPY . /src

# # show the spack's spec
RUN spack -e $ENVPATH find -lcdv

# build COSTA and Tiled-MM with current @master branch
RUN cd $ENVPATH/costa && git pull && git log --oneline -1 && \
    cd $ENVPATH/tiled-mm && git pull && git log --oneline -1

# show the spack.yaml
RUN cat $ENVPATH/spack.yaml

# build packages
RUN spack -e $ENVPATH install

# we need a fixed name for the build directory
# here is a hacky workaround to link ./spack-build-{hash} to ./spack-build
RUN cd /src && ln -s $(spack -e $ENVPATH location -b cosma) spack-build


================================================
FILE: ci/cscs.yml
================================================
include:
  - remote: 'https://gitlab.com/cscs-ci/recipes/-/raw/master/templates/v2/.ci-ext.yml'

stages:
  - baseimage
  - build
  - test

build base image:
  extends: [.dynamic-image-name, .container-builder-cscs-gh200]
  stage: baseimage
  timeout: 2h
  variables:
    DOCKERFILE: ci/baseimage.cuda.Dockerfile
    WATCH_FILECHANGES: ci/baseimage.cuda.Dockerfile
    PERSIST_IMAGE_NAME: $CSCS_REGISTRY_PATH/base/cosma-ci
    KUBERNETES_MEMORY_REQUEST: "92Gi"
    KUBERNETES_MEMORY_LIMIT: "92Gi"

build cosma:
  extends: .container-builder-cscs-gh200
  needs: ["build base image"]
  stage: build
  variables:
    CSCS_REBUILD_POLICY: "always"
    DOCKERFILE: ci/build.Dockerfile
    PERSIST_IMAGE_NAME: $CSCS_REGISTRY_PATH/cosma/cosma-ci:$CI_COMMIT_SHA
    ENVPATH: "/cosma-env-cuda"
    DOCKER_BUILD_ARGS: '["BASE_IMAGE=${BASE_IMAGE}", "ENVPATH=$ENVPATH"]'

.run_tests:
  extends: [.container-runner-daint-gh200]
  needs: ["build cosma"]
  stage: test
  image: $CSCS_REGISTRY_PATH/cosma/cosma-ci:$CI_COMMIT_SHA
  variables:
    GIT_STRATEGY: none
    MPICH_MAX_THREAD_SAFETY: multiple
    CSCS_REGISTRY_LOGIN: 'YES'
    PULL_IMAGE: 'YES'
    SLURM_HINT: nomultithread
    SLURM_UNBUFFEREDIO: ''
    SLURM_CPU_BIND: 'socket'
    SLURM_MPI: "pmi2"
    CRAY_CUDA_MPS: 'YES'
    # SLURM_WAIT: 0
    COSMA_GPU_MAX_TILE_K: 100
    COSMA_GPU_MAX_TILE_M: 100
    COSMA_GPU_MAX_TILE_N: 100

mapper:
  extends: .run_tests
  stage: test
  script: /cosma-env-cuda/.spack-env/view/bin/test.mapper
  variables:
    SLURM_JOB_NUM_NODES: 1
    SLURM_NTASKS: 1
    USE_MPI: 'YES'

pdgemm:
  extends: .run_tests
  stage: test
  script: /cosma-env-cuda/.spack-env/view/bin/test.pdgemm
  variables:
    SLURM_JOB_NUM_NODES: 2
    SLURM_NTASKS: 16
    USE_MPI: 'YES'

multiply:
  extends: .run_tests
  stage: test
  script: /cosma-env-cuda/.spack-env/view/bin/test.multiply
  variables:
    SLURM_JOB_NUM_NODES: 2
    SLURM_NTASKS: 16
    USE_MPI: 'YES'

scalar_matmul:
  extends: .run_tests
  stage: test
  script: /cosma-env-cuda/.spack-env/view/bin/test.scalar_matmul
  variables:
    SLURM_JOB_NUM_NODES: 1
    SLURM_NTASKS: 8
    USE_MPI: 'YES'

multiply_using_layout:
  extends: .run_tests
  stage: test
  script: /cosma-env-cuda/.spack-env/view/bin/test.multiply_using_layout
  variables:
    SLURM_JOB_NUM_NODES: 1
    SLURM_NTASKS: 4


================================================
FILE: ci/mps-wrapper.sh
================================================
#!/bin/bash
# Example mps-wrapper.sh usage:
# > srun --cpu-bind=socket [...] mps-wrapper.sh <cmd>

export CUDA_MPS_PIPE_DIRECTORY=/tmp/nvidia-mps
export CUDA_MPS_LOG_DIRECTORY=/tmp/nvidia-log
# Launch MPS from a single rank per node
if [ $SLURM_LOCALID -eq 0 ]; then
    CUDA_VISIBLE_DEVICES=0,1,2,3 nvidia-cuda-mps-control -d
fi

# set cuda device
numa_nodes=$(hwloc-calc --physical --intersect NUMAnode $(taskset -p $$ | awk '{print "0x"$6}'))
export CUDA_VISIBLE_DEVICES=$numa_nodes
# Run the command
exec numactl --membind=$numa_nodes "$@"


================================================
FILE: cmake/FindARMPL.cmake
================================================
# Copyright (c) 2022- ETH Zurich
#
# authors : Mathieu Taillefumier

include(FindPackageHandleStandardArgs)

set(_ARMPL_PATHS ${ARMPL_ROOT}
  $ENV{ARMPL_ROOT}
  $ENV{ARMPLROOT}
  $ENV{ARMPL_DIR}
  $ENV{ARMPLDIR}
  $ENV{ORNL_ARMPL_ROOT}
  $ENV{CRAY_ARMPL_ROOT})

foreach(_var armpl armpl_int64 armpl_ilp64 armpl_lp64 armpl_ilp64_mp armpl_lp64_mp)
  string(TOUPPER ${_var} _var_up)
  find_library("COSMA_${_var_up}_LINK_LIBRARIES" NAME ${_var} HINTS ${_ARMPL_PATHS} PATH_SUFFIXES "lib" "lib64" "armpl/lib" "armpl/lib64" "armpl")
endforeach()

find_path(COSMA_ARMPL_INCLUDE_DIRS NAMES "armpl.h" HINTS ${_ARMPL_PATHS} PATH_SUFFIXES "include" "armpl" "armpl/include" "include/armpl")

# Check for 64bit Integer support
if(COSMA_BLAS_INTERFACE MATCHES "64bits")
  set(COSMA_BLAS_armpl_LIB "ARMPL_ILP64")
else()
  set(COSMA_BLAS_armpl_LIB "ARMPL_LP64")
endif()

# Check for OpenMP support, VIA BLAS_VENDOR of Arm_mp or Arm_ipl64_mp
if(COSMA_BLAS_THREADING MATCHES "openmp")
  string(APPEND COSMA_BLAS_armpl_LIB "_MP")
endif()

# check if found
find_package_handle_standard_args(
  Armpl REQUIRED_VARS COSMA_ARMPL_INCLUDE_DIRS COSMA_ARMPL_LP64_LINK_LIBRARIES
  COSMA_ARMPL_LP64_MP_LINK_LIBRARIES COSMA_ARMPL_ILP64_LINK_LIBRARIES COSMA_ARMPL_ILP64_MP_LINK_LIBRARIES)

# add target to link against
if (NOT TARGET cosma::BLAS::ARMPL::armpl)
  add_library(cosma::BLAS::ARMPL::armpl INTERFACE IMPORTED)
  # now define an alias to the target library
  add_library(cosma::BLAS::ARMPL::blas ALIAS cosma::BLAS::ARMPL::armpl)
endif()

# we need to iniitialize the targets of each individual libraries only once.
foreach(_var armpl_ilp64 armpl_lp64 armpl_ilp64_mp armpl_lp64_mp)
  string(TOUPPER "${_var}" _var_up)
  if (NOT TARGET cosma::BLAS::ARMPL::${_var})
    add_library(cosma::BLAS::ARMPL::${_var} INTERFACE IMPORTED)
    set_property(TARGET cosma::BLAS::ARMPL::${_var} PROPERTY INTERFACE_INCLUDE_DIRECTORIES
      ${COSMA_ARMPL_INCLUDE_DIRS})
    set_property(TARGET cosma::BLAS::ARMPL::${_var} PROPERTY INTERFACE_LINK_LIBRARIES
      "${COSMA_${_var_up}_LINK_LIBRARIES}")
  endif()
endforeach()

set_property(TARGET cosma::BLAS::ARMPL::armpl PROPERTY INTERFACE_INCLUDE_DIRECTORIES
  ${COSMA_ARMPL_INCLUDE_DIRS})
set_property(TARGET cosma::BLAS::ARMPL::armpl PROPERTY INTERFACE_LINK_LIBRARIES
  "${COSMA_${COSMA_BLAS_armpl_LIB}_LINK_LIBRARIES}")
endif()

set(COSMA_BLAS_VENDOR "ARMPL")

mark_as_advanced(COSMA_ARMPL_FOUND COSMA_BLAS_VENDOR COSMA_ARMPL_INCLUDE_DIRS)


================================================
FILE: cmake/FindATLAS.cmake
================================================
#  Copyright (c) 2019 ETH Zurich
#
#  Redistribution and use in source and binary forms, with or without
#  modification, are permitted provided that the following conditions are met:
#
#  1. Redistributions of source code must retain the above copyright notice,
#     this list of conditions and the following disclaimer.
#  2. Redistributions in binary form must reproduce the above copyright
#     notice, this list of conditions and the following disclaimer in the
#     documentation and/or other materials provided with the distribution.
#  3. Neither the name of the copyright holder nor the names of its contributors
#     may be used to endorse or promote products derived from this software
#     without specific prior written permission.
#
#  THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
#  AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
#  IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
#  ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
#  LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
#  CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
#  SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
#  INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
#  CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
#  ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
#  POSSIBILITY OF SUCH DAMAGE.


#.rst:
# FindATLAS
# -----------
#
# This module tries to find the ATLAS library.
#
# The following variables are set
#
# ::
#
#   ATLAS_FOUND           - True if atlas is found
#   ATLAS_LIBRARIES       - The required libraries
#   ATLAS_INCLUDE_DIRS    - The required include directory
#
# The following import target is created
#
# ::
#
#   ATLAS::atlas

#set paths to look for library from ROOT variables.If new policy is set, find_library() automatically uses them.
# if(NOT POLICY CMP0074)
set(_ATLAS_PATHS ${ATLAS_ROOT}
                 $ENV{ATLAS_ROOT}
                 $ENV{ATLASROOT}
                 $ENV{ATLAS_DIR}
                 $ENV{ATLASDIR})
# endif()

find_library(
    COSMA_ATLAS_LINK_LIBRARIES
    NAMES "atlas"
    HINTS ${_ATLAS_PATHS}
    PATH_SUFFIXES "atlas/lib" "atlas/lib64" "atlas"
)
find_path(
    COSMA_ATLAS_INCLUDE_DIRS
    NAMES "cblas-atlas.h" "cblas_atlas.h" "cblas.h" 
    HINTS ${_ATLAS_PATHS}
    PATH_SUFFIXES "atlas" "atlas/include" "include/atlas"
)

# check if found
include(FindPackageHandleStandardArgs)
find_package_handle_standard_args(ATLAS REQUIRED_VARS COSMA_ATLAS_INCLUDE_DIRS COSMA_ATLAS_LINK_LIBRARIES)

# add target to link against
if(NOT TARGET cosma::BLAS::ATLAS::atlas)
  add_library(cosma::BLAS::ATLAS::atlas INTERFACE IMPORTED)
  add_library(cosma::BLAS::ATLAS::blas ALIAS cosma::BLAS::ATLAS::atlas)
endif()
set_property(TARGET cosma::BLAS::ATLAS::atlas PROPERTY INTERFACE_LINK_LIBRARIES ${COSMA_ATLAS_LINK_LIBRARIES})
set_property(TARGET cosma::BLAS::ATLAS::atlas PROPERTY INTERFACE_INCLUDE_DIRECTORIES ${COSMA_ATLAS_INCLUDE_DIRS})

# prevent clutter in cache
MARK_AS_ADVANCED(ATLAS_FOUND ATLAS_LIBRARIES ATLAS_INCLUDE_DIRS)


================================================
FILE: cmake/FindBLIS.cmake
================================================
#  Copyright (c) 2019 ETH Zurich
#
#  Redistribution and use in source and binary forms, with or without
#  modification, are permitted provided that the following conditions are met:
#
#  1. Redistributions of source code must retain the above copyright notice,
#     this list of conditions and the following disclaimer.
#  2. Redistributions in binary form must reproduce the above copyright
#     notice, this list of conditions and the following disclaimer in the
#     documentation and/or other materials provided with the distribution.
#  3. Neither the name of the copyright holder nor the names of its contributors
#     may be used to endorse or promote products derived from this software
#     without specific prior written permission.
#
#  THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
#  AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
#  IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
#  ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
#  LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
#  CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
#  SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
#  INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
#  CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
#  ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
#  POSSIBILITY OF SUCH DAMAGE.


#.rst:
# FindBLIS
# -----------
#
# This module tries to find the BLIS library.
#
# The following variables are set
#
# ::
#
#   BLIS_FOUND           - True if blis is found
#   BLIS_LIBRARIES       - The required libraries
#   BLIS_INCLUDE_DIRS    - The required include directory
#
# The following import target is created
#
# ::
#
#   BLIS::blis

#set paths to look for library from ROOT variables.If new policy is set, find_library() automatically uses them.
# if(NOT POLICY CMP0074)
set(_BLIS_PATHS ${BLIS_ROOT} 
                $ENV{BLIS_ROOT} 
                $ENV{BLISROOT}
                $ENV{BLIS_DIR}
                $ENV{BLISDIR})
# endif()

find_library(
    COSMA_BLIS_LINK_LIBRARIES
    NAMES "blis"
    HINTS ${_BLIS_PATHS}
    PATH_SUFFIXES "lib" "lib64" "blis/lib" "blis/lib64" "blis"
)
find_path(
    COSMA_BLIS_INCLUDE_DIRS
    NAMES "blis.h"
    HINTS ${_BLIS_PATHS}
    PATH_SUFFIXES "include" "blis" "blis/include" "include/blis"
)
find_path(
    COSMA_BLIS_CBLAS_INCLUDE_DIRS
    NAMES "cblas_blis.h" "cblas-blis.h" "cblas.h" 
    HINTS ${_BLIS_PATHS}
    PATH_SUFFIXES "include" "blis" "blis/include" "include/blis"
)

# check if found
include(FindPackageHandleStandardArgs)
find_package_handle_standard_args(BLIS REQUIRED_VARS COSMA_BLIS_INCLUDE_DIRS COSMA_BLIS_LINK_LIBRARIES COSMA_BLIS_CBLAS_INCLUDE_DIRS)

# add target to link against
if(NOT TARGET cosma::BLAS::BLIS::blis)
  add_library(cosma::BLAS::BLIS::blis INTERFACE IMPORTED)
  add_library(cosma::BLAS::BLIS::blas ALIAS cosma::BLAS::BLIS::blis)
endif()
set_property(TARGET cosma::BLAS::BLIS::blis PROPERTY INTERFACE_LINK_LIBRARIES ${COSMA_BLIS_LINK_LIBRARIES})
set_property(TARGET cosma::BLAS::BLIS::blis PROPERTY INTERFACE_INCLUDE_DIRECTORIES ${COSMA_BLIS_INCLUDE_DIRS} ${COSMA_BLIS_CBLAS_INCLUDE_DIRS})

# prevent clutter in cache
MARK_AS_ADVANCED(BLIS_FOUND COSMA_BLIS_LINK_LIBRARIES COSMA_BLIS_INCLUDE_DIRS COSMA_BLIS_CBLAS_INCLUDE_DIRS)


================================================
FILE: cmake/FindBlas.cmake
================================================
# Copyright (c) 2022- ETH Zurich
#
# authors : Mathieu Taillefumier

include(FindPackageHandleStandardArgs)

if(NOT
   (CMAKE_C_COMPILER_LOADED
    OR CMAKE_CXX_COMPILER_LOADED
    OR CMAKE_Fortran_COMPILER_LOADED))
  message(FATAL_ERROR "FindBLAS requires Fortran, C, or C++ to be enabled.")
endif()

set(COSMA_BLAS_VENDOR_LIST
  "auto"
  "MKL"
  "OPENBLAS"
  "FLEXIBLAS"
  "ARMPL"
  "GenericBLAS"
  "CRAY_LIBSCI"
  "BLIS"
  "ATLAS"
  "NVPL"
  "OFF")

# COSMA_BLAS_VENDOR should normally be defined here but cosma defines it in the
# main CMakeLists.txt to keep the old behavior. the threading and integer
# interface can also be controlled but are fixed to the default values that
# COSMA was configured before introducing this module. So if findBLAS.cmake is
# to be used elsewhere, it is better to look at what CP2K does and start from
# there

if(NOT ${COSMA_BLAS_VENDOR} IN_LIST COSMA_BLAS_VENDOR_LIST)
  message(FATAL_ERROR "Invalid Host BLAS backend")
endif()

set(COSMA_BLAS_THREAD_LIST "sequential" "thread" "gnu-thread" "intel-thread"
  "tbb-thread" "openmp")

set(COSMA_BLAS_THREADING
  "openmp"
  CACHE STRING "threaded blas library")
set_property(CACHE COSMA_BLAS_THREADING PROPERTY STRINGS
  ${COSMA_BLAS_THREAD_LIST})

if(NOT ${COSMA_BLAS_THREADING} IN_LIST COSMA_BLAS_THREAD_LIST)
  message(FATAL_ERROR "Invalid threaded BLAS backend")
endif()

set(COSMA_BLAS_INTERFACE_BITS_LIST "32bits" "64bits")
set(COSMA_BLAS_INTERFACE
  "32bits"
  CACHE STRING
  "32 bits integers are used for indices, matrices and vectors sizes")
set_property(CACHE COSMA_BLAS_INTERFACE
  PROPERTY STRINGS ${COSMA_BLAS_INTERFACE_BITS_LIST})

if(NOT ${COSMA_BLAS_INTERFACE} IN_LIST COSMA_BLAS_INTERFACE_BITS_LIST)
  message(
    FATAL_ERROR
    "Invalid parameters. Blas and lapack can exist in two flavors 32 or 64 bits interfaces (relevant mostly for mkl)"
  )
endif()

if (COSMA_BLAS_VENDOR MATCHES "OFF")
   return ()
endif()

set(COSMA_BLAS_FOUND FALSE)

# first check for a specific implementation if requested

if(NOT COSMA_BLAS_VENDOR MATCHES "auto")
   if (COSMA_BLAS_VENDOR MATCHES "CUSTOM")
       find_package(GenericBLAS REQUIRED)
   else()
       find_package(${COSMA_BLAS_VENDOR} REQUIRED)
  endif()
  if(TARGET cosma::BLAS::${COSMA_BLAS_VENDOR}::blas)
    get_target_property(COSMA_BLAS_INCLUDE_DIRS cosma::BLAS::${COSMA_BLAS_VENDOR}::blas
                        INTERFACE_INCLUDE_DIRECTORIES)
    get_target_property(COSMA_BLAS_LINK_LIBRARIES cosma::BLAS::${COSMA_BLAS_VENDOR}::blas
                        INTERFACE_LINK_LIBRARIES)
    set(COSMA_BLAS_FOUND TRUE)
  endif()
else()
  # search for any blas implementation and exit imediately if one is found
  foreach(_libs ${COSMA_BLAS_VENDOR_LIST})
    # i exclude the first item of the list
    if (NOT _libs STREQUAL "auto")
      find_package(${_libs})
      if(TARGET cosma::BLAS::${_libs}::blas)
        get_target_property(COSMA_BLAS_INCLUDE_DIRS cosma::BLAS::${_libs}::blas
          INTERFACE_INCLUDE_DIRECTORIES)
        get_target_property(COSMA_BLAS_LINK_LIBRARIES cosma::BLAS::${_libs}::blas
          INTERFACE_LINK_LIBRARIES)
        set(COSMA_BLAS_VENDOR "${_libs}")
        set(COSMA_BLAS_FOUND TRUE)
        break()
      endif()
    endif()
  endforeach()
endif()

if(COSMA_BLAS_INCLUDE_DIRS)
  find_package_handle_standard_args(
    Blas REQUIRED_VARS COSMA_BLAS_LINK_LIBRARIES COSMA_BLAS_INCLUDE_DIRS
                       COSMA_BLAS_VENDOR)
else()
  message(WARNING "Blas REQUIRED_VARS '${COSMA_BLAS_LINK_LIBRARIES}' '${COSMA_BLAS_VENDOR}'")
  find_package_handle_standard_args(
    Blas REQUIRED_VARS COSMA_BLAS_LINK_LIBRARIES COSMA_BLAS_VENDOR)
endif()

if(NOT TARGET cosma::BLAS::blas)
  add_library(cosma::BLAS::blas INTERFACE IMPORTED)
endif()

set_target_properties(cosma::BLAS::blas PROPERTIES INTERFACE_LINK_LIBRARIES
  "${COSMA_BLAS_LINK_LIBRARIES}")

if(COSMA_BLAS_INCLUDE_DIRS)
  set_target_properties(cosma::BLAS::blas PROPERTIES INTERFACE_INCLUDE_DIRECTORIES
    "${COSMA_BLAS_INCLUDE_DIRS}")
endif()

mark_as_advanced(COSMA_BLAS_INCLUDE_DIRS)
mark_as_advanced(COSMA_BLAS_LINK_LIBRARIES)
mark_as_advanced(COSMA_BLAS_VENDOR)
mark_as_advanced(COSMA_BLAS_FOUND)


================================================
FILE: cmake/FindCRAY_LIBSCI.cmake
================================================
include(FindPackageHandleStandardArgs)

# we are using the GNU compiler
set(_sciname "sci_gnu_mpi_mp")
set(_sciname_acc "sci_acc_gnu_nv60")

find_library(COSMA_CRAY_LIBSCI_LIBRARIES
    NAMES ${_sciname_acc} ${_sciname}
    HINTS
    ${_SCALAPACK_LIBRARY_DIRS}
    ENV CRAY_LIBSCI_PREFIX_DIR
    ENV CRAY_PE_LIBSCI_PREFIX_DIR
    ENV CRAY_LIBSCI_ACC_PREFIX_DIR
    ENV CRAY_PE_LIBSCI_ACC_PREFIX_DIR
    PATH_SUFFIXES lib
    DOC "Path to the Cray-libsci library.")

message("CRAY_LIBSCI: ${COSMA_CRAY_LIBSCI_LIBRARIES}")

find_package_handle_standard_args(CRAY_LIBSCI DEFAULT_MSG COSMA_CRAY_LIBSCI_LIBRARIES)

if (NOT TARGET cosma::BLAS::CRAY_LIBSCI::sci)
	add_library(cosma::BLAS::CRAY_LIBSCI::sci INTERFACE IMPORTED)
	set_target_properties(cosma::BLAS::CRAY_LIBSCI::sci PROPERTIES INTERFACE_LINK_LIBRARIES "${COSMA_CRAY_LIBSCI_LIBRARIES}")
	add_library(cosma::BLAS::CRAY_LIBSCI::blas ALIAS cosma::BLAS::CRAY_LIBSCI::sci)

	add_library(cosma::BLAS::CRAY_LIBSCI::scalapack_link INTERFACE IMPORTED)
	set_target_properties(cosma::BLAS::CRAY_LIBSCI::scalapack_link PROPERTIES INTERFACE_LINK_LIBRARIES "${COSMA_CRAY_LIBSCI_LIBRARIES}")
endif()


================================================
FILE: cmake/FindFLEXIBLAS.cmake
================================================
# Copyright (c) 2022- ETH Zurich
#
# authors : Mathieu Taillefumier

include(FindPackageHandleStandardArgs)

set(_FLEXIBLAS_PATHS ${FLEXIBLAS_ROOT}
  $ENV{FLEXIBLAS_ROOT}
  $ENV{FLEXIBLASROOT}
  $ENV{FLEXIBLAS_DIR}
  $ENV{FLEXIBLASDIR}
  $ENV{ORNL_FLEXIBLAS_ROOT}
  $ENV{CRAY_FLEXIBLAS_ROOT})

# try first with pkg-config
find_package(PkgConfig QUIET)

if(PKG_CONFIG_FOUND)
  pkg_check_modules(COSMA_FLEXIBLAS IMPORTED_TARGET GLOBAL flexiblas)
endif()

find_package_handle_standard_args(
  FLEXIBLAS DEFAULT_MSG COSMA_FLEXIBLAS_INCLUDE_DIRS
  COSMA_FLEXIBLAS_LINK_LIBRARIES)

if(COSMA_FLEXIBLAS_FOUND)
  set(COSMA_BLAS_VENDOR "FlexiBLAS")
  
  if(NOT TARGET cosma::BLAS::FLEXIBLAS::flexiblas)
    add_library(cosma::BLAS::FLEXIBLAS::flexiblas INTERFACE IMPORTED)
    add_library(cosma::BLAS::FLEXIBLAS::blas ALIAS cosma::BLAS::FLEXIBLAS::flexiblas)
  endif()
  set_target_properties(
    cosma::BLAS::FLEXIBLAS::flexiblas PROPERTIES INTERFACE_LINK_LIBRARIES
    "${COSMA_FLEXIBLAS_LINK_LIBRARIES}")
  if(COSMA_FLEXIBLAS_INCLUDE_DIRS)
    set_target_properties(
      cosma::BLAS::FLEXIBLAS::flexiblas PROPERTIES INTERFACE_INCLUDE_DIRECTORIES
      "${COSMA_FLEXIBLAS_INCLUDE_DIRS}")
  endif()
endif()

mark_as_advanced(COSMA_FLEXIBLAS_FOUND COSMA_FLEXIBLAS_INCLUDE_DIRS
                 COSMA_FLEXIBLAS_LINK_LIBRARIES COSMA_BLAS_VENDOR)


================================================
FILE: cmake/FindGenericBLAS.cmake
================================================
# Copyright (c) 2022- ETH Zurich
#
# authors : Mathieu Taillefumier
include(FindPackageHandleStandardArgs)

if(NOT POLICY CMP0074)
  set(_GenericBLAS_PATHS ${GenericBLAS_ROOT} $ENV{GenericBLAS_ROOT})
endif()

find_library(
  COSMA_GenericBLAS_LINK_LIBRARIES
  NAMES "blas"
  HINTS ${_GenericBLAS_PATHS})
find_library(
  # optinally look for cblas library - not required
  COSMA_GenericBLAS_CBLAS_LIBRARIES
  NAMES "cblas"
  HINTS ${_GenericBLAS_PATHS})
find_path(
  COSMA_GenericBLAS_INCLUDE_DIRS
  NAMES "cblas.h"
  HINTS ${_GenericBLAS_PATHS})

# check if found
if(COSMA_GenericBLAS_INCLUDE_DIRS)
  find_package_handle_standard_args(
    GenericBLAS REQUIRED_VARS COSMA_GenericBLAS_INCLUDE_DIRS COSMA_GenericBLAS_LINK_LIBRARIES)
else()
  find_package_handle_standard_args(GenericBLAS
                                    REQUIRED_VARS COSMA_GenericBLAS_LINK_LIBRARIES)
endif()

if(COSMA_GenericBLAS_CBLAS_LINK_LIBRARIES)
  list(APPEND GenericBLAS_LINK_LIBRARIES ${GenericBLAS_CBLAS_LINK_LIBRARIES})
endif()

# add target to link against
if(NOT TARGET cosma::BLAS::GenericBLAS::blas)
  add_library(cosma::BLAS::GenericBLAS::blas INTERFACE IMPORTED)
endif()
set_property(TARGET cosma::BLAS::GenericBLAS::blas PROPERTY INTERFACE_LINK_LIBRARIES
  ${COSMA_GenericBLAS_LINK_LIBRARIES})
set_property(
  TARGET cosma::BLAS::GenericBLAS::blas PROPERTY INTERFACE_INCLUDE_DIRECTORIES
  ${COSMA_GenericBLAS_INCLUDE_DIRS})
endif()

# prevent clutter in cache
mark_as_advanced(COSMA_GenericBLAS_FOUND COSMA_GenericBLAS_LINK_LIBRARIES
                 COSMA_GenericBLAS_INCLUDE_DIRS COSMA_GenericBLAS_CBLAS_LIBRARIES)


================================================
FILE: cmake/FindMKL.cmake
================================================
#
# CMake recipes https://github.com/eth-cscs/cmake-recipes
#
# Copyright (c) 2018-2019, ETH Zurich BSD 3-Clause License. All rights reserved.
#
# Author: Teodor Nikolov (teodor.nikolov22@gmail.com)
#
#[=======================================================================[.rst:
FindMKL
-------

The following conventions are used:

intel / INTEL  - Bindings for everything except GNU Fortran
gf / GF        - GNU Fortran bindings
seq / SEQ      - sequential MKL
omp / OMP      - threaded MKL with OpenMP back end
tbb / TBB      - threaded MKL with TBB back end
32bit / 32BIT  - MKL 32 bit integer interface (used most often)
64bit / 64BIT  - MKL 64 bit integer interface
mpich / MPICH  - MPICH / IntelMPI BLACS back end
ompi / OMPI    - OpenMPI BLACS back end
st / ST        - static libraries
dyn / DYN      - dynamic libraries

The module attempts to define a target for each MKL configuration. The
configuration will not be available if there are missing library files or a
missing dependency.

MKL Link line advisor:
https://software.intel.com/en-us/articles/intel-mkl-link-line-advisor

Note: Mixing GCC and Intel OpenMP backends is a bad idea.

Search variables
^^^^^^^^^^^^^^^^

``MKLROOT``
Environment variable set to MKL's root directory

``MKL_ROOT``
CMake variable set to MKL's root directory

Example usage
^^^^^^^^^^^^^

To Find MKL:

find_package(MKL REQUIRED)

To check if target is available:

if (TARGET MKL::scalapack_mpich_intel_32bit_omp_dyn)
  ...
endif()

To link to an available target (see list below):

target_link_libraries(... MKL::scalapack_mpich_intel_32bit_omp_dyn)

Note: dependencies are handled for you (MPI, OpenMP, ...)

the target MKL::blas, MKL::MKL, MKL::lapack also include all necessary libraries
for linking.
MKL::MKL is also used by the cmake module provided by intel.

MKL::scalapack_link gives all libraries needed for scalapack.

Imported targets
^^^^^^^^^^^^^^^^

MKL (BLAS, LAPACK, FFT) targets:

MKL::[gf|intel]_[32bit|64bit]_[seq|omp|tbb]_[st|dyn] e.g.

MKL::mkl_intel_32bit_omp_dyn

BLACS targets:

MKL::blacs_[mpich|ompi]_[gf|intel]_[32bit|64bit]_[seq|omp|tbb]_[st|dyn] e.g.

MKL::blacs_intel_mpich_32bit_seq_st


ScaLAPACK targets:

MKL::scalapack_[mpich|ompi]_[gf|intel]_[32bit|64bit]_[seq|omp|tbb]_[st|dyn] e.g.

MKL::scalapack_mpich_intel_64bit_omp_dyn

Result variables
^^^^^^^^^^^^^^^^

MKL_FOUND

Not supported
^^^^^^^^^^^^^

- F95 interfaces

#]=======================================================================]

# Copyright (c) 2022- ETH Zurich
#
# Redistribution and use in source and binary forms, with or without
# modification, are permitted provided that the following conditions are met:
#
# 1. Redistributions of source code must retain the above copyright notice, this
#    list of conditions and the following disclaimer.
# 2. Redistributions in binary form must reproduce the above copyright notice,
#    this list of conditions and the following disclaimer in the documentation
#    and/or other materials provided with the distribution.
# 3. Neither the name of the copyright holder nor the names of its contributors
#    may be used to endorse or promote products derived from this software
#    without specific prior written permission.
#
# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
# AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
# IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
# DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE
# FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
# DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
# SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
# CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
# OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.

include(FindPackageHandleStandardArgs)

if(NOT
   (CMAKE_C_COMPILER_LOADED
    OR CMAKE_CXX_COMPILER_LOADED
    OR CMAKE_Fortran_COMPILER_LOADED))
  message(FATAL_ERROR "FindMKL requires Fortran, C, or C++ to be enabled.")
endif()

# Dependencies
#
enable_language(Fortran)
enable_language(C)
find_package(Threads)
find_package(MPI COMPONENTS CXX C Fortran)
find_package(OpenMP COMPONENTS CXX C Fortran)

# If MKL_ROOT is not set, set it via the env variable MKLROOT.
#
if(NOT DEFINED MKL_ROOT)
  set(MKL_ROOT
      $ENV{MKLROOT}
      CACHE PATH "MKL's root directory.")
endif()

# Determine MKL's library folder
#
set(_mkl_libpath_suffix "intel64")
if(CMAKE_SIZEOF_VOID_P EQUAL 4) # 32 bit
  set(_mkl_libpath_suffix "ia32")
endif()

if(WIN32)
  list(APPEND _mkl_libpath_suffix_list ${_mkl_libpath_suffix})
  string(APPEND _mkl_libpath_suffix "_win")
  list(APPEND _mkl_libpath_suffix_list ${_mkl_libpath_suffix})
  set(_mkl_libname_prefix "")
  set(_mkl_shared_lib "_dll.lib")
  set(_mkl_static_lib ".lib")
elseif(APPLE)
  list(APPEND _mkl_libpath_suffix_list ${_mkl_libpath_suffix})
  string(APPEND _mkl_libpath_suffix "_mac")
  list(APPEND _mkl_libpath_suffix_list ${_mkl_libpath_suffix})
  set(_mkl_libname_prefix "lib")
  set(_mkl_shared_lib ".dylib")
  set(_mkl_static_lib ".a")
else() # LINUX
  list(APPEND _mkl_libpath_suffix_list ${_mkl_libpath_suffix})
  string(APPEND _mkl_libpath_suffix "_lin")
  list(APPEND _mkl_libpath_suffix_list ${_mkl_libpath_suffix})
  set(_mkl_libname_prefix "lib")
  set(_mkl_shared_lib ".so")
  set(_mkl_static_lib ".a")
endif()
set(_mkl_search_paths "${MKL_ROOT}" "${MKL_ROOT}/lib" "${MKL_ROOT}/mkl/lib"
                      "${MKL_ROOT}/compiler/lib")

# Functions: finds both static and shared MKL libraries
#
function(__mkl_find_library _varname _libname)
  find_library(
    ${_varname}_DYN
    NAMES ${_mkl_libname_prefix}${_libname}${_mkl_shared_lib}
    HINTS ${_mkl_search_paths}
    PATH_SUFFIXES ${_mkl_libpath_suffix_list})
  mark_as_advanced(${_varname}_DYN)
  find_library(
    ${_varname}_ST
    NAMES ${_mkl_libname_prefix}${_libname}${_mkl_static_lib}
    HINTS ${_mkl_search_paths}
    PATH_SUFFIXES ${_mkl_libpath_suffix_list})
  mark_as_advanced(${_varname}_ST)
endfunction()

# Find MKL headers
#
find_path(COSMA_MKL_INCLUDE_DIRS mkl.h HINTS ${MKL_ROOT}/include
                                            ${MKL_ROOT}/mkl/include)
mark_as_advanced(COSMA_MKL_INCLUDE_DIRS)

# Group flags for static libraries on Linux (GNU, PGI, ICC -> same linker)
#
if(UNIX AND NOT APPLE)
  set(_mkl_linker_pre_flags_ST "-Wl,--start-group")
  set(_mkl_linker_post_flags_ST "-Wl,--end-group")
endif()

# Core MKL
#
__mkl_find_library(MKL_CORE_LIB mkl_core)

# Interface
#
__mkl_find_library(MKL_INTERFACE_INTEL_32BIT_LIB mkl_intel_lp64)
__mkl_find_library(MKL_INTERFACE_INTEL_64BIT_LIB mkl_intel_ilp64)
if(NOT APPLE
   AND CMAKE_Fortran_COMPILER_LOADED
   AND CMAKE_Fortran_COMPILER_ID STREQUAL "GNU")
  __mkl_find_library(MKL_INTERFACE_GF_32BIT_LIB mkl_gf_lp64)
  __mkl_find_library(MKL_INTERFACE_GF_64BIT_LIB mkl_gf_ilp64)
endif()

# Threading
#
__mkl_find_library(MKL_SEQ_LIB mkl_sequential)
if(NOT APPLE
   AND (CMAKE_C_COMPILER_ID STREQUAL "GNU"
        OR CMAKE_CXX_COMPILER_ID STREQUAL "GNU"
        OR CMAKE_Fortran_COMPILER_ID STREQUAL "GNU"))
  __mkl_find_library(MKL_OMP_LIB mkl_gnu_thread)
else()
  __mkl_find_library(MKL_OMP_LIB mkl_intel_thread)
endif()
__mkl_find_library(MKL_TBB_LIB mkl_tbb_thread)

# BLACS
#
if(APPLE)
  __mkl_find_library(MKL_BLACS_MPICH_32BIT_LIB mkl_blacs_mpich_lp64)
  __mkl_find_library(MKL_BLACS_MPICH_64BIT_LIB mkl_blacs_mpich_ilp64)
else()
  __mkl_find_library(MKL_BLACS_MPICH_32BIT_LIB mkl_blacs_intelmpi_lp64)
  __mkl_find_library(MKL_BLACS_MPICH_64BIT_LIB mkl_blacs_intelmpi_ilp64)
endif()
__mkl_find_library(MKL_BLACS_OMPI_32BIT_LIB mkl_blacs_openmpi_lp64)
__mkl_find_library(MKL_BLACS_OMPI_64BIT_LIB mkl_blacs_openmpi_ilp64)

# ScaLAPACK
#
__mkl_find_library(MKL_SCALAPACK_32BIT_LIB mkl_scalapack_lp64)
__mkl_find_library(MKL_SCALAPACK_64BIT_LIB mkl_scalapack_ilp64)

# Check if core libs were found
#
find_package_handle_standard_args(MKL REQUIRED_VARS COSMA_MKL_INCLUDE_DIRS
                                                    Threads_FOUND)

# Sequential has no threading dependency. There is currently no TBB module
# shipped with CMake. The dependency is not accounted for. (FIXME)
#
set(_mkl_dep_found_SEQ TRUE)
set(_mkl_dep_found_TBB TRUE)
if(TARGET OpenMP::OpenMP_CXX)
  set(_mkl_dep_OMP ${OpenMP_CXX_LIBRARIES})
  set(_mkl_dep_found_OMP TRUE)
endif()

# Define all blas, blacs and scalapack
#
foreach(_libtype "ST" "DYN")
  set(_mkl_core_lib ${MKL_CORE_LIB_${_libtype}})
  foreach(_bits "32BIT" "64BIT")
    set(_mkl_scalapack_lib ${MKL_SCALAPACK_${_bits}_LIB_${_libtype}})
    foreach(_iface "INTEL" "GF")
      set(_mkl_interface_lib
          ${MKL_INTERFACE_${_iface}_${_bits}_LIB_${_libtype}})
      foreach(_threading "SEQ" "OMP" "TBB")
        set(_mkl_threading_lib ${MKL_${_threading}_LIB_${_libtype}})

        string(TOLOWER "${_iface}_${_bits}_${_threading}_${_libtype}"
                       _tgt_config)
        set(_mkl_tgt cosma::BLAS::MKL::${_tgt_config})

        if(MKL_FOUND
           AND _mkl_interface_lib
           AND _mkl_threading_lib
           AND _mkl_core_lib
           AND _mkl_dep_found_${_threading}
           AND NOT TARGET ${_mkl_tgt})
          set(_mkl_libs
              "${_mkl_linker_pre_flags_${_threading}}"
              "${_mkl_interface_lib}"
              "${_mkl_threading_lib}"
              "${_mkl_core_lib}"
              "${_mkl_linker_post_flags_${_threading}}"
              "${_mkl_dep_${_threading}}"
              "Threads::Threads")
          add_library(${_mkl_tgt} INTERFACE IMPORTED)
          set_target_properties(
            ${_mkl_tgt}
            PROPERTIES INTERFACE_INCLUDE_DIRECTORIES "${COSMA_MKL_INCLUDE_DIRS}"
                       INTERFACE_LINK_LIBRARIES "${_mkl_libs}")
        endif()

        foreach(_mpi_impl "MPICH" "OMPI")
          set(_mkl_blacs_lib ${MKL_BLACS_${_mpi_impl}_${_bits}_LIB_${_libtype}})

          string(
            TOLOWER "${_mpi_impl}_${_iface}_${_bits}_${_threading}_${_libtype}"
                    _tgt_config)

          set(_scalapack_tgt cosma::BLAS::MKL::scalapack_${_tgt_config})

          if(_mkl_blacs_lib
             AND TARGET ${_mkl_tgt}
             AND TARGET MPI::MPI_CXX
             AND NOT TARGET cosma::BLAS::MKL::blacs_${_tgt_config})
            set(_blacs_libs
                "${_mkl_linker_pre_flags_${_libtype}}"
                "${_mkl_interface_lib}"
                "${_mkl_threading_lib}"
                "${_mkl_core_lib}"
                "${_mkl_blacs_lib}"
                "${_mkl_linker_post_flags_${_libtype}}"
                "MPI::MPI_CXX"
                "${_mkl_dep_${_threading}}"
                "Threads::Threads")
            add_library(cosma::BLAS::MKL::blacs_${_tgt_config} INTERFACE IMPORTED)
            set_target_properties(
              cosma::BLAS::MKL::blacs_${_tgt_config}
              PROPERTIES INTERFACE_INCLUDE_DIRECTORIES
                         "${COSMA_MKL_INCLUDE_DIRS}" INTERFACE_LINK_LIBRARIES
                                                    "${_mkl_blacs_lib}")
          endif()

          if(_mkl_scalapack_lib AND NOT TARGET
                                    cosma::BLAS::MKL::scalapack_${_tgt_config})
            set(_scalapack_libs "${_mkl_scalapack_lib}" "${_blacs_tgt}")
            add_library(cosma::BLAS::MKL::scalapack_${_tgt_config} INTERFACE IMPORTED)
            set_target_properties(
              cosma::BLAS::MKL::scalapack_${_tgt_config}
              PROPERTIES INTERFACE_LINK_LIBRARIES "${_scalapack_libs}")
          endif()
        endforeach()
      endforeach()
    endforeach()
  endforeach()
endforeach()

if(MKL_FOUND)
  # BLAS in the Intel MKL 10+ library?

  # the findMKL package finds all possible combination and define target for
  # each of them we just need to find which compiler we use, mpi etc...

  if(CMAKE_Fortran_COMPILER_LOADED
     AND CMAKE_Fortran_COMPILER_ID STREQUAL "GNU"
     AND NOT APPLE)
    set(COSMA_BLAS_mkl_INTFACE "gf")
  else()
	  set(COSMA_BLAS_mkl_INTFACE "intel")
  endif()

  if(COSMA_BLAS_THREADING MATCHES "thread|gnu-thread|openmp")
	  set(COSMA_BLAS_mkl_thread__ "omp")
  endif()

  if(COSMA_BLAS_THREADING MATCHES "sequential")
	  set(COSMA_BLAS_mkl_thread__ "seq")
  endif()

  if(COSMA_BLAS_THREADING MATCHES "intel-thread")
	  set(COSMA_BLAS_mkl_thread__ "intel")
  endif()

  if(COSMA_BLAS_THREADING MATCHES "tbb")
	  set(COSMA_BLAS_mkl_thread__ "tbb")
  endif()

  if(COSMA_BLAS_INTERFACE MATCHES "64bits")
	  set(COSMA_BLAS_mkl_ILP_MODE "64bit")
  else()
	  set(COSMA_BLAS_mkl_ILP_MODE "32bit")
  endif()

  get_target_property(
    MKL_BLAS_INCLUDE_DIRS
    cosma::BLAS::MKL::${COSMA_BLAS_mkl_INTFACE}_${COSMA_BLAS_mkl_ILP_MODE}_${COSMA_BLAS_mkl_thread__}_dyn
    INTERFACE_INCLUDE_DIRECTORIES)
  get_target_property(
    MKL_BLAS_LIBRARIES
    cosma::BLAS::MKL::${COSMA_BLAS_mkl_INTFACE}_${COSMA_BLAS_mkl_ILP_MODE}_${COSMA_BLAS_mkl_thread__}_dyn
    INTERFACE_LINK_LIBRARIES)
  if(NOT TARGET cosma::BLAS::MKL::blas)
    add_library(cosma::BLAS::MKL::MKL INTERFACE IMPORTED)
    add_library(cosma::BLAS::MKL::blas ALIAS cosma::BLAS::MKL::MKL)
    # create a empty lapack
    add_library(cosma::BLAS::MKL::lapack INTERFACE IMPORTED)
  endif()
  set_target_properties(
    cosma::BLAS::MKL::MKL
    PROPERTIES INTERFACE_INCLUDE_DIRECTORIES "${COSMA_MKL_INCLUDE_DIRS}"
               INTERFACE_LINK_LIBRARIES "${MKL_BLAS_LIBRARIES}")

  if("${MPI_CXX_LIBRARY_VERSION_STRING}" MATCHES "Open MPI")
    set(__mkl_mpi_ver_ "ompi")
  else()
    set(__mkl_mpi_ver_ "mpich")
  endif()

  get_target_property(
    __mkl_scalapack_inc
    cosma::BLAS::MKL::scalapack_${__mkl_mpi_ver_}_${COSMA_BLAS_mkl_INTFACE}_${COSMA_BLAS_mkl_ILP_MODE}_${COSMA_BLAS_mkl_thread__}_dyn
    INTERFACE_INCLUDE_DIRECTORIES)
  get_target_property(
    __mkl_scalapack_lib
    cosma::BLAS::MKL::scalapack_${__mkl_mpi_ver_}_${COSMA_BLAS_mkl_INTFACE}_${COSMA_BLAS_mkl_ILP_MODE}_${COSMA_BLAS_mkl_thread__}_dyn
    INTERFACE_LINK_LIBRARIES)
  get_target_property(
    __mkl_blacs_inc
    cosma::BLAS::MKL::blacs_${__mkl_mpi_ver_}_${COSMA_BLAS_mkl_INTFACE}_${COSMA_BLAS_mkl_ILP_MODE}_${COSMA_BLAS_mkl_thread__}_dyn
    INTERFACE_INCLUDE_DIRECTORIES)
  get_target_property(
    __mkl_blacs_lib
    cosma::BLAS::MKL::blacs_${__mkl_mpi_ver_}_${COSMA_BLAS_mkl_INTFACE}_${COSMA_BLAS_mkl_ILP_MODE}_${COSMA_BLAS_mkl_thread__}_dyn
    INTERFACE_LINK_LIBRARIES)
  if(NOT TARGET cosma::BLAS::MKL::scalapack_link)
    add_library(cosma::BLAS::MKL::scalapack_link INTERFACE IMPORTED)
    set_target_properties(
      cosma::BLAS::MKL::scalapack_link
      PROPERTIES INTERFACE_INCLUDE_DIRECTORIES "${__mkl_scalapack_inc}"
                 INTERFACE_LINK_LIBRARIES
                 "${__mkl_scalapack_lib};${__mkl_blacs_lib}")
  endif()
  unset(COSMA_BLAS_mkl_ILP_MODE)
  unset(COSMA_BLAS_mkl_INTFACE)
  unset(COSMA_BLAS_mkl_thread__)
  unset(BLAS_mkl_OMP)
  unset(BLAS_mkl_OS_NAME)
  unset(__mkl_blacs_lib)
  unset(__mkl_blacs_inc)
  unset(__mkl_scalapack_lib)
  unset(__mkl_scalapack_inc)
  set(COSMA_BLAS_VENDOR "MKL")
  set(COSMA_MKL_SCALAPACK_VENDOR TRUE)
  mark_as_advanced(COSMA_BLAS_VENDOR)
  mark_as_advanced(COSMA_MKL_FOUND)
  mark_as_advanced(COSMA_MKL_SCALAPACK_VENDOR)
endif()


================================================
FILE: cmake/FindNCCL.cmake
================================================
include(FindPackageHandleStandardArgs)

find_path(COSMA_NCCL_INCLUDE_DIRS
  NAMES nccl.h
  HINTS
  ${NCCL_ROOT}
  ENV NCCLROOT
)

find_library(COSMA_NCCL_LIBRARIES
  NAMES nccl nccl_static
  HINTS
  ${NCCL_ROOT}
  ENV NCCLROOT
)

find_package_handle_standard_args(NCCL DEFAULT_MSG COSMA_NCCL_INCLUDE_DIRS COSMA_NCCL_LIBRARIES)

if (NCCL_FOUND AND NOT TARGET cosma::nccl)
  add_library(cosma::nccl INTERFACE IMPORTED)
  set_target_properties(cosma::nccl
    PROPERTIES
    INTERFACE_INCLUDE_DIRECTORIES ${COSMA_NCCL_INCLUDE_DIRS}
    INTERFACE_LINK_LIBRARIES ${COSMA_NCCL_LIBRARIES})
endif()


================================================
FILE: cmake/FindNVPL.cmake
================================================
find_package("nvpl_blas" REQUIRED)
find_package("nvpl_lapack" REQUIRED)
find_package("nvpl_scalapack" REQUIRED)

if(COSMA_BLAS_INTERFACE STREQUAL "32bits")
  set(_nvpl_int "_lp64")
else()
  set(_nvpl_int "_ilp64")
endif()

if(COSMA_BLAS_THREADING STREQUAL "openmp")
  set(_nvpl_thread "_omp")
else()
  set(_nvpl_thread "_seq")
endif()

if("${MPI_CXX_LIBRARY_VERSION_STRING}" MATCHES "Open MPI")
  if(MPI_VERSION VERSION_GREATER_EQUAL "5.0")
    set(_nvpl_mpi "_openmpi5")
  elseif(MPI_VERSION VERSION_GREATER_EQUAL "4.0")
    set(_nvpl_mpi "_openmpi4")
  else(MPI_VERSION VERSION_GREATER_EQUAL "3.0")
    set(_nvpl_mpi "_openmpi3")
  endif()
else()
  set(_nvpl_mpi "_mpich")
endif()

if(NOT TARGET "cosma::BLAS::NVPL::nvpl")
  add_library("cosma::BLAS::NVPL::nvpl" INTERFACE IMPORTED)
  target_link_libraries("cosma::BLAS::NVPL::nvpl" INTERFACE
    "nvpl::blas${_nvpl_int}${_nvpl_thread}" "nvpl::lapack${_nvpl_int}${_nvpl_thread}"
    "nvpl::blacs${_nvpl_int}${_nvpl_mpi}" "nvpl::scalapack${_nvpl_int}")

  get_target_property(COSMA_NVPL_LAPACK_LIBRARIES "nvpl::lapack${_nvpl_int}${_nvpl_thread}" INTERFACE_LINK_LIBRARIES)
  get_target_property(COSMA_NVPL_SCALAPACK_LIBRARIES "nvpl::scalapack${_nvpl_int}" INTERFACE_LINK_LIBRARIES)
  get_target_property(COSMA_NVPL_BLAS_INCLUDE_DIRS "nvpl::blas${_nvpl_int}${_nvpl_thread}" INTERFACE_INCLUDE_DIRECTORIES)
  get_target_property(COSMA_NVPL_LAPACK_INCLUDE_DIRS "nvpl::lapack${_nvpl_int}${_nvpl_thread}" INTERFACE_INCLUDE_DIRECTORIES)
  get_target_property(COSMA_NVPL_SCALAPACK_INCLUDE_DIRS "nvpl::scalapack${_nvpl_int}" INTERFACE_INCLUDE_DIRECTORIES)

  set_target_properties(
    cosma::BLAS::NVPL::nvpl 
    PROPERTIES INTERFACE_LINK_LIBRARIES 
    "${COSMA_NVPL_LAPACK_LIBRARIES}")
  set_target_properties(
    cosma::BLAS::NVPL::nvpl
    PROPERTIES INTERFACE_INCLUDE_DIRECTORIES 
    "${COSMA_NVPL_BLAS_INCLUDE_DIRS};${COSMA_NVPL_LAPACK_INCLUDE_DIRS}")

  add_library(cosma::BLAS::NVPL::blas ALIAS cosma::BLAS::NVPL::nvpl)

  add_library(cosma::BLAS::NVPL::scalapack_link INTERFACE IMPORTED)
  set_target_properties(
    cosma::BLAS::NVPL::scalapack_link 
    PROPERTIES INTERFACE_LINK_LIBRARIES 
    "${COSMA_NVPL_LAPACK_LIBRARIES};${COSMA_NVPL_SCALAPACK_LIBRARIES}")
  set_target_properties(
    cosma::BLAS::NVPL::scalapack_link 
    PROPERTIES INTERFACE_INCLUDE_DIRECTORIES 
    "${COSMA_NVPL_BLAS_INCLUDE_DIRS};${COSMA_NVPL_LAPACK_INCLUDE_DIRS};${COSMA_NVPL_SCALAPACK_INCLUDE_DIRS}")
endif()


================================================
FILE: cmake/FindOPENBLAS.cmake
================================================
# find OPENBLAS

include(FindPackageHandleStandardArgs)

# if(NOT POLICY CMP0074)
set(_OPENBLAS_PATHS ${OPENBLAS_ROOT}
    $ENV{OPENBLAS_ROOT}
    $ENV{OPENBLASROOT}
    $ENV{OPENBLAS_DIR}
    $ENV{OPENBLASDIR})
# endif()

find_path(COSMA_OPENBLAS_INCLUDE_DIRS
    NAMES "cblas-openblas.h" "cblas_openblas.h" "cblas.h"
    PATH_SUFFIXES "openblas" "openblas/include" "include" "include/openblas"
    HINTS ${_OPENBLAS_PATHS}
    DOC "openblas include directory")

  find_library(COSMA_OPENBLAS_LINK_LIBRARIES
    NAMES openblas
    PATH_SUFFIXES "lib" "lib64" "openblas/lib" "openblas/lib64" "openblas"
    HINTS ${_OPENBLAS_PATHS}
    DOC "openblas libraries list")

  find_package_handle_standard_args(OPENBLAS
    DEFAULT_MSG
    COSMA_OPENBLAS_LINK_LIBRARIES COSMA_OPENBLAS_INCLUDE_DIRS)

  if(NOT TARGET cosma::BLAS::OPENBLAS::openblas)
    add_library(cosma::BLAS::OPENBLAS::openblas INTERFACE IMPORTED)
    add_library(cosma::BLAS::OPENBLAS::blas ALIAS cosma::BLAS::OPENBLAS::openblas)
  endif()
  set_property(TARGET cosma::BLAS::OPENBLAS::openblas
    PROPERTY INTERFACE_LINK_LIBRARIES ${COSMA_OPENBLAS_LINK_LIBRARIES})
  set_property(TARGET cosma::BLAS::OPENBLAS::openblas
    PROPERTY INTERFACE_INCLUDE_DIRECTORIES ${COSMA_OPENBLAS_INCLUDE_DIRS})

# prevent clutter in cache
MARK_AS_ADVANCED(OPENBLAS_FOUND OPENBLAS_LIBRARIES OPENBLAS_INCLUDE_DIRS)


================================================
FILE: cmake/FindSCALAPACK.cmake
================================================
include(FindPackageHandleStandardArgs)

if(COSMA_SCALAPACK STREQUAL "MKL")
  find_package(MKL REQUIRED)
  get_target_property(COSMA_SCALAPACK_LINK_LIBRARIES cosma::BLAS::MKL::scalapack_link
    INTERFACE_LINK_LIBRARIES)
elseif(COSMA_SCALAPACK STREQUAL "CRAY_LIBSCI")
  find_package(CRAY_LIBSCI REQUIRED)
  get_target_property(COSMA_SCALAPACK_LINK_LIBRARIES cosma::BLAS::CRAY_LIBSCI::scalapack_link
    INTERFACE_LINK_LIBRARIES)
elseif(COSMA_SCALAPACK STREQUAL "NVPL")
  find_package(NVPL REQUIRED)
  get_target_property(COSMA_SCALAPACK_LINK_LIBRARIES cosma::BLAS::NVPL::scalapack_link
    INTERFACE_LINK_LIBRARIES)
  message(WARNING "COSMA_SCALAPACK_LINK_LIBRARIES: ${COSMA_SCALAPACK_LINK_LIBRARIES}")
elseif(COSMA_SCALAPACK STREQUAL "CUSTOM")
  find_library(COSMA_SCALAPACK_LINK_LIBRARIES
    NAMES scalapack
    HINTS
    ${_COSMA_SCALAPACK_LIBRARY_DIRS}
    ENV SCALAPACKROOT
    ENV SCALAPACK_ROOT
    ENV ORNL_SCALAPACK_ROOT
    ENV SCALAPACK_PREFIX
    ENV SCALAPACK_DIR
    ENV SCALAPACKDIR
    /usr/bin
    PATH_SUFFIXES lib
    DOC "Path to the scalapack library.")
endif()

find_package_handle_standard_args(SCALAPACK REQUIRED_VARS COSMA_SCALAPACK_LINK_LIBRARIES)

set(COSMA_SCALAPACK_FOUND "YES")

if (NOT TARGET cosma::scalapack::scalapack)
  add_library(cosma::scalapack::scalapack INTERFACE IMPORTED)
  set_target_properties(
    cosma::scalapack::scalapack PROPERTIES INTERFACE_LINK_LIBRARIES
    "${COSMA_SCALAPACK_LINK_LIBRARIES}")
endif()

mark_as_advanced(COSMA_SCALAPACK_LINK_LIBRARIES COSMA_SCALAPACK_FOUND)


================================================
FILE: cmake/GitSubmodule.cmake
================================================
# Call to ensure that the git submodule in location `path` is loaded.
# If the submodule is not loaded, an error message that describes
# how to update the submodules is printed.
# Sets the variable name_avail to `ON` if the submodule is available,
# or `OFF` otherwise.
# copyright github.com/arbor-sim

function(check_git_submodule name path)
    set(success_var "${name}_avail")
    set(${success_var} ON PARENT_SCOPE)

    get_filename_component(dotgit "${path}/.git" ABSOLUTE)
    if(NOT EXISTS ${dotgit})
        message(
            "\nThe git submodule for ${name} is not available.\n"
            "To check out all submodules use the following commands:\n"
            "    git submodule init\n"
            "    git submodule update\n"
            "Or download submodules recursively when checking out:\n"
            "    git clone --recursive https://github.com/eth-cscs/COSMA.git\n"
        )

        # if the repository was not available, and git failed, set AVAIL to false
        set(${success_var} OFF PARENT_SCOPE)
    endif()
endfunction()

function(add_git_submodule_or_find_external name path)
  check_git_submodule(${name} ${path})
  if(NOT ${name}_avail)
    # attempt to find system installation of pybind11
    find_package(${name} REQUIRED)
  else()
    message(VERBOSE "Using ${name} as git submodule from ${path}")
    add_subdirectory("${path}")
  endif()
endfunction()


================================================
FILE: cmake/adjust_mpiexec_flags.cmake
================================================
# Appends the --oversubscribe flag if OpenMPI.
#
function(adjust_mpiexec_flags)
    execute_process(COMMAND mpirun --version OUTPUT_VARIABLE MPIRUN_OUTPUT)
    string(FIND "${MPIRUN_OUTPUT}" "Open MPI" OMPI_POS)
    if(NOT OMPI_POS STREQUAL "-1")
        set(MPIEXEC_PREFLAGS "--oversubscribe;${MPIEXEC_PREFLAGS}" CACHE STRING "These flags will be directly before the executable that is being run by mpiexec." FORCE)
        set(MPI_TYPE "ompi" PARENT_SCOPE)
    else()
        set(MPI_TYPE "mpich" PARENT_SCOPE)
    endif()
endfunction()


================================================
FILE: cmake/build_type.cmake
================================================
# Set default to Release if none was specified and update the docs.
#
set(default_build_type ${CMAKE_BUILD_TYPE})
if(NOT CMAKE_BUILD_TYPE AND NOT CMAKE_CONFIGURATION_TYPES)
    message(STATUS "Setting build type to 'Release' as none was specified.")
    set(default_build_type "Release")
endif()
set(CMAKE_BUILD_TYPE "${default_build_type}" CACHE STRING "Choose the type of build, options are: None Debug Release RelWithDebInfo MinSizeRel Profile." FORCE)

# Define a custom build type
#
#set( CMAKE_CXX_FLAGS_PROFILE "${CMAKE_CXX_FLAGS_RELEASE}" CACHE STRING "" FORCE)
#set( CMAKE_C_FLAGS_PROFILE "${CMAKE_C_FLAGS_RELEASE}" CACHE STRING "" FORCE )
#set( CMAKE_EXE_LINKER_FLAGS_PROFILE "${CMAKE_EXE_LINKER_FLAGS_RELEASE}" CACHE STRING "" FORCE )
#set( CMAKE_SHARED_LINKER_FLAGS_PROFILE "${CMAKE_SHARED_LINKER_FLAGS_RELEASE}" CACHE STRING "" FORCE )
#mark_as_advanced(CMAKE_CXX_FLAGS_PROFILE
#                 CMAKE_C_FLAGS_PROFILE
#                 CMAKE_EXE_LINKER_FLAGS_PROFILE
#                 CMAKE_SHARED_LINKER_FLAGS_PROFILE )
#
# use with $<$<CONFIG:Profile>:semiprof>


================================================
FILE: cmake/cosma.pc.in
================================================
prefix=@CMAKE_INSTALL_PREFIX@
exec_prefix=@CMAKE_INSTALL_PREFIX@
libdir=@CMAKE_INSTALL_FULL_LIBDIR@
includedir=@CMAKE_INSTALL_FULL_INCLUDEDIR@

Name: cosma
Description: Distributed communication-optimal matrix-matrix multiplication algorithm 
Version: @cosma_VERSION@
Libs: -L${libdir} -lcosma
Cflags: -I${includedir}


================================================
FILE: cmake/cosmaConfig.cmake.in
================================================
if(NOT TARGET cosma::cosma)
    cmake_policy(PUSH) # Save project's policies
    if(POLICY CMP0074)
        cmake_policy(SET CMP0074 NEW)
    endif()
    include(CMakeFindDependencyMacro)

    # Bundled modules should be found first to prevent conflicts with similarly
    # named modules in calling projects.
    #
    set(CMAKE_MODULE_PATH ${CMAKE_CURRENT_LIST_DIR} ${CMAKE_MODULE_PATH})

    set(MKL_ROOT "@MKL_ROOT@")
    set(MPI_DETERMINE_LIBRARY_VERSION TRUE)
    find_package(MPI COMPONENTS "CXX" REQUIRED)

    set(COSMA_BLAS "@COSMA_BLAS@")
    set(COSMA_SCALAPACK "@COSMA_SCALAPACK@")
    set(COSMA_BLAS_VENDOR "@COSMA_BLAS_VENDOR@")
    set(COSMA_BLAS_THREADING "@COSMA_BLAS_THREADING@")

    if ("@COSMA_GPU_BACKEND@" STREQUAL "CUDA" OR "@COSMA_GPU_BACKEND@" STREQUAL "ROCM")
      if (${COSMA_BLAS} STREQUAL "CUDA")
        find_dependency(CUDAToolkit)
      else()
        find_dependency(hip)
      endif()

      set(TILEMM_GPU_BACKEND "@COSMA_GPU_BACKEND@" CACHE STRING FORCE "")
      find_dependency(Tiled-MM) # bundled

      if ("@COSMA_WITH_NCCL@")
        find_dependency(NCCL)
      endif()

      if("@COSMA_WITH_RCCL@")
        find_dependency(rccl)
      endif()
    endif ()

    if (NOT @COSMA_BLAS_VENDOR@ MATCHES "OFF")
      find_dependency(Blas)
    endif()

    if (NOT ${COSMA_SCALAPACK} MATCHES "OFF")
        find_dependency(SCALAPACK)
    endif ()

    if ("@COSMA_WITH_PROFILING@")
        find_dependency(semiprof)
    endif ()

    # Clean-up module path.
    #
    list(REMOVE_ITEM CMAKE_MODULE_PATH ${CMAKE_CURRENT_LIST_DIR})

    # These are bundled with cosma
    #
    find_dependency(costa)

    include("${CMAKE_CURRENT_LIST_DIR}/cosmaTargets.cmake")
    cmake_policy(POP) # Restore project's policies
endif()


================================================
FILE: cmake/find_cuda_version.cmake
================================================
# finds CUDA_TOOLKIT_MAJOR_VERSION AND CUDA_TOOLKIT_MINOR_VERSION
function(find_cuda_version)
    execute_process(COMMAND nvcc --version  OUTPUT_VARIABLE CUDA_VERSION_STRING)
    string(REGEX MATCH "release ([0-9]*)\\.([0-9]*)" _ "${CUDA_VERSION_STRING}")
    set(CUDA_TOOLKIT_MAJOR_VERSION ${CMAKE_MATCH_1})
    set(CUDA_TOOLKIT_MINOR_VERSION ${CMAKE_MATCH_2})
    message(STATUS "CUDA_TOOLKIT_MAJOR_VERSION = ${CUDA_TOOLKIT_MAJOR_VERSION}")
    message(STATUS "CUDA_TOOLKIT_MINOR_VERSION = ${CUDA_TOOLKIT_MINOR_VERSION}")
endfunction()


================================================
FILE: docker/asan/build-env.Dockerfile
================================================
FROM ubuntu:20.04

WORKDIR /root
SHELL ["/bin/bash", "-c"]

ARG MPICH_VERSION=4.0.1

ENV DEBIAN_FRONTEND noninteractive
ENV FORCE_UNSAFE_CONFIGURE 1
ENV MPICH_VERSION ${MPICH_VERSION}

# Install basic tools
RUN apt-get update -qq && apt-get install -qq -y --no-install-recommends \
    software-properties-common \
    build-essential gfortran pkg-config \
    git tar wget curl chrpath && \
    rm -rf /var/lib/apt/lists/*

# Install cmake
RUN wget -qO- "https://cmake.org/files/v3.22/cmake-3.22.1-linux-x86_64.tar.gz" | tar --strip-components=1 -xz -C /usr/local

# Install MPICH ABI compatible with Cray's lib on Piz Daint
RUN wget -q https://www.mpich.org/static/downloads/${MPICH_VERSION}/mpich-${MPICH_VERSION}.tar.gz && \
    tar -xzf mpich-${MPICH_VERSION}.tar.gz && \
    cd mpich-${MPICH_VERSION} && \
    ./configure && \
    make install -j$(nproc) && \
    rm -rf /root/mpich-${MPICH_VERSION}.tar.gz /root/mpich-${MPICH_VERSION}

# Install OpenBLAS
ARG OPENBLAS_VERSION=0.3.20
RUN wget -qO - https://github.com/xianyi/OpenBLAS/archive/v${OPENBLAS_VERSION}.tar.gz -O openblas.tar.gz && \
    tar -xzf openblas.tar.gz && \
    cd OpenBLAS-${OPENBLAS_VERSION}/ && \
    make TARGET=HASWELL NO_STATIC=1 DEBUG=1 -j$(nproc) && \
    make install TARGET=HASWELL NO_STATIC=1 PREFIX=/usr/local/ && \
    rm -rf /root/openblas.tar.gz /root/OpenBLAS-${OPENBLAS_VERSION}/ && \
    ldconfig

ARG NETLIB_SCALAPACK_VERSION=2.2.0

RUN wget -qO - http://www.netlib.org/scalapack/scalapack-${NETLIB_SCALAPACK_VERSION}.tgz -O scalapack.tar.gz && \
    tar -xzf scalapack.tar.gz && \
    cd scalapack-${NETLIB_SCALAPACK_VERSION} && \
    mkdir build && \
    cd build && \
    CC=mpicc FC=mpif90 cmake .. \
      -DBUILD_STATIC_LIBS=OFF \
      -DBUILD_SHARED_LIBS=ON \
      -DCMAKE_BUILD_TYPE=Debug && \
    make -j$(nproc) && \
    make install && \
    rm -rf /root/scalapack.tar.gz /root/scalapack-${NETLIB_SCALAPACK_VERSION} && \
    ldconfig

# Add deployment tooling
RUN mkdir -p /opt/libtree && \
    curl -Lfso /opt/libtree/libtree https://github.com/haampie/libtree/releases/download/v2.0.0/libtree_x86_64 && \
    chmod +x /opt/libtree/libtree


================================================
FILE: docker/asan/deploy.Dockerfile
================================================
ARG BUILD_ENV

FROM $BUILD_ENV as builder

ARG BLAS

# Build COSMA
COPY . /COSMA

# reduce the minimum local dimension to allow all mpi ranks to take part 
# in testing
ENV COSMA_MIN_LOCAL_DIMENSION=32

RUN mkdir /COSMA/build && cd /COSMA/build && \
    CC=mpicc CXX=mpicxx cmake .. \
      -DCOSMA_WITH_TESTS=ON \
      -DCOSMA_BLAS=OPENBLAS \
      -DCOSMA_SCALAPACK=CUSTOM \
      -DCMAKE_BUILD_TYPE=Debug \
      -DCMAKE_CXX_FLAGS_DEBUG="-g -Og -fno-omit-frame-pointer -fsanitize=address,undefined" \
      -DCMAKE_INSTALL_PREFIX=/root/COSMA-build && \
      make -j$(nproc) && \
      make install && \
      rm -rf /COSMA

RUN /opt/libtree/libtree \
      --chrpath \
      -d /root/COSMA.bundle/ \
      /root/COSMA-build/bin/test.cosma \
      /root/COSMA-build/bin/test.mapper \
      /root/COSMA-build/bin/test.multiply \
      /root/COSMA-build/bin/test.multiply_using_layout \
      /root/COSMA-build/bin/test.pdgemm \
      /root/COSMA-build/bin/test.scalar_matmul

FROM ubuntu:20.04

# Automatically print stacktraces on segfault
ENV LD_PRELOAD=/lib/x86_64-linux-gnu/libSegFault.so

COPY --from=builder /root/COSMA.bundle /root/COSMA.bundle

# Make it easy to call our binaries.
ENV PATH="/root/COSMA.bundle/usr/bin:$PATH"

RUN echo "/root/COSMA.bundle/usr/lib/" > /etc/ld.so.conf.d/cosma.conf && ldconfig

WORKDIR /root/COSMA.bundle/usr/bin

# I'm not getting ASAN_OPTIONS=suppressions=file to work, so just disable leak detection for now.
ENV ASAN_OPTIONS=detect_leaks=false


================================================
FILE: docker/cpu-release/build-env.Dockerfile
================================================
FROM ubuntu:20.04

WORKDIR /root
SHELL ["/bin/bash", "-c"]

ARG MKL_VERSION=2020.4-912
ARG MPICH_VERSION=4.0.1

ENV DEBIAN_FRONTEND noninteractive
ENV MKLROOT=/opt/intel/compilers_and_libraries/linux/mkl
ENV FORCE_UNSAFE_CONFIGURE 1
ENV MPICH_VERSION ${MPICH_VERSION}
ENV MKL_VERSION ${MKL_VERSION}

# reduce the minimum local dimension to allow all mpi ranks to take part 
# in testing
ENV COSMA_MIN_LOCAL_DIMENSION=32

# Install basic tools
RUN apt-get update -qq && apt-get install -qq -y --no-install-recommends \
    software-properties-common \
    build-essential \
    git tar wget curl gpg-agent chrpath && \
    rm -rf /var/lib/apt/lists/*

# Install cmake
RUN wget -qO- "https://cmake.org/files/v3.22/cmake-3.22.1-linux-x86_64.tar.gz" | tar --strip-components=1 -xz -C /usr/local

# Install MPICH ABI compatible with Cray's lib on Piz Daint
RUN wget -q https://www.mpich.org/static/downloads/${MPICH_VERSION}/mpich-${MPICH_VERSION}.tar.gz && \
    tar -xzf mpich-${MPICH_VERSION}.tar.gz && \
    cd mpich-${MPICH_VERSION} && \
    ./configure --disable-fortran && \
    make install -j$(nproc) && \
    rm -rf /root/mpich-${MPICH_VERSION}.tar.gz /root/mpich-${MPICH_VERSION}

# Install MKL
RUN wget -qO - https://apt.repos.intel.com/intel-gpg-keys/GPG-PUB-KEY-INTEL-SW-PRODUCTS-2019.PUB 2>/dev/null | apt-key add - && \
    apt-add-repository 'deb https://apt.repos.intel.com/mkl all main' && \
    apt-get install -y -qq --no-install-recommends intel-mkl-64bit-${MKL_VERSION} && \
    rm -rf /var/lib/apt/lists/* && \
    echo -e "/opt/intel/lib/intel64\n/opt/intel/compilers_and_libraries/linux/mkl/lib/intel64" >> /etc/ld.so.conf.d/intel.conf && \
    ldconfig

# Add deployment tooling
RUN mkdir -p /opt/libtree && \
    curl -Lfso /opt/libtree/libtree https://github.com/haampie/libtree/releases/download/v2.0.0/libtree_x86_64 && \
    chmod +x /opt/libtree/libtree


================================================
FILE: docker/cpu-release/deploy.Dockerfile
================================================
ARG BUILD_ENV

FROM $BUILD_ENV as builder

# Build COSMA
COPY . /COSMA

RUN source /opt/intel/bin/compilervars.sh intel64 && \
    mkdir /COSMA/build && cd /COSMA/build && \
    CC=mpicc CXX=mpicxx cmake .. \
      -DCOSMA_WITH_TESTS=ON \
      -DCOSMA_BLAS=MKL \
      -DCOSMA_SCALAPACK=MKL \
      -DCMAKE_BUILD_TYPE=Release \
      -DCMAKE_INSTALL_PREFIX=/root/COSMA-build && \
      make -j$(nproc) && \
      make install && \
      rm -rf /COSMA

ENV MKL_LIB=/opt/intel/compilers_and_libraries/linux/mkl/lib/intel64

# Run linuxdeploy, and add a bunch of libs that are dlopen'ed by mkl
RUN /opt/libtree/libtree --chrpath --strip -d /root/COSMA.bundle/ \
      /root/COSMA-build/bin/test.cosma \
      /root/COSMA-build/bin/test.mapper \
      /root/COSMA-build/bin/test.multiply \
      /root/COSMA-build/bin/test.multiply_using_layout \
      /root/COSMA-build/bin/test.pdgemm \
      /root/COSMA-build/bin/test.scalar_matmul \
      # MKL dlopen's some of their libs, so we have to explicitly copy them over
      ${MKL_LIB}/libmkl_avx.so \
      ${MKL_LIB}/libmkl_avx2.so \
      ${MKL_LIB}/libmkl_avx512_mic.so \
      ${MKL_LIB}/libmkl_avx512.so \
      ${MKL_LIB}/libmkl_core.so \
      ${MKL_LIB}/libmkl_def.so \
      ${MKL_LIB}/libmkl_intel_thread.so \
      ${MKL_LIB}/libmkl_mc.so \
      ${MKL_LIB}/libmkl_mc3.so \
      ${MKL_LIB}/libmkl_sequential.so \
      ${MKL_LIB}/libmkl_tbb_thread.so \
      ${MKL_LIB}/libmkl_vml_avx.so \
      ${MKL_LIB}/libmkl_vml_avx2.so \
      ${MKL_LIB}/libmkl_vml_avx512_mic.so \
      ${MKL_LIB}/libmkl_vml_avx512.so \
      ${MKL_LIB}/libmkl_vml_cmpt.so \
      ${MKL_LIB}/libmkl_vml_def.so \
      ${MKL_LIB}/libmkl_vml_mc.so \
      ${MKL_LIB}/libmkl_vml_mc3.so

FROM ubuntu:20.04

COPY --from=builder /root/COSMA.bundle /root/COSMA.bundle

# Automatically print stacktraces on segfault
ENV LD_PRELOAD=/lib/x86_64-linux-gnu/libSegFault.so

# Make it easy to call our binaries.
ENV PATH="/root/COSMA.bundle/usr/bin:$PATH"

RUN echo "/root/COSMA.bundle/usr/lib/" > /etc/ld.so.conf.d/cosma.conf && ldconfig

WORKDIR /root/COSMA.bundle/usr/bin


================================================
FILE: docker/gpu/build-env.Dockerfile
================================================
FROM nvidia/cuda:11.6.2-devel-ubuntu20.04

WORKDIR /root
SHELL ["/bin/bash", "-c"]

ARG MPICH_VERSION=4.0.1
ARG OPENBLAS_VERSION=0.3.20
ARG NETLIB_SCALAPACK_VERSION=2.2.0

ENV DEBIAN_FRONTEND noninteractive
ENV MKLROOT=/opt/intel/compilers_and_libraries/linux/mkl
ENV FORCE_UNSAFE_CONFIGURE 1
ENV MPICH_VERSION ${MPICH_VERSION}
ENV MKL_VERSION ${MKL_VERSION}

# reduce the minimum local dimension to allow all mpi ranks to take part 
# in testing
ENV COSMA_MIN_LOCAL_DIMENSION=32

# Install basic tools
RUN apt-get update -qq && \
    apt-get install -qq -y --no-install-recommends \
      software-properties-common \
      build-essential gfortran pkg-config \
      git tar wget curl chrpath && \
    rm -rf /var/lib/apt/lists/*

# Install cmake
RUN wget -qO- "https://cmake.org/files/v3.22/cmake-3.22.1-linux-x86_64.tar.gz" | tar --strip-components=1 -xz -C /usr/local

# Install MPICH ABI compatible with Cray's lib on Piz Daint
RUN wget -q https://www.mpich.org/static/downloads/${MPICH_VERSION}/mpich-${MPICH_VERSION}.tar.gz && \
    tar -xzf mpich-${MPICH_VERSION}.tar.gz && \
    cd mpich-${MPICH_VERSION} && \
    ./configure && \
    make install -j$(nproc) && \
    rm -rf /root/mpich-${MPICH_VERSION}.tar.gz /root/mpich-${MPICH_VERSION}

# Install OpenBLAS
RUN wget -qO - https://github.com/xianyi/OpenBLAS/archive/v${OPENBLAS_VERSION}.tar.gz -O openblas.tar.gz && \
    tar -xzf openblas.tar.gz && \
    cd OpenBLAS-${OPENBLAS_VERSION}/ && \
    make TARGET=HASWELL NO_STATIC=1 -j$(nproc) && \
    make install TARGET=HASWELL NO_STATIC=1 PREFIX=/usr/local/ && \
    rm -rf /root/openblas.tar.gz /root/OpenBLAS-${OPENBLAS_VERSION}/ && \
    ldconfig

RUN wget -qO - http://www.netlib.org/scalapack/scalapack-${NETLIB_SCALAPACK_VERSION}.tgz -O scalapack.tar.gz && \
    tar -xzf scalapack.tar.gz && \
    cd scalapack-${NETLIB_SCALAPACK_VERSION} && \
    mkdir build && \
    cd build && \
    CC=mpicc FC=mpif90 cmake .. \
      -DBUILD_STATIC_LIBS=OFF \
      -DBUILD_SHARED_LIBS=ON \
      -DCMAKE_BUILD_TYPE=Release && \
    make -j$(nproc) && \
    make install && \
    rm -rf /root/scalapack.tar.gz /root/scalapack-${NETLIB_SCALAPACK_VERSION} && \
    ldconfig

# Add deployment tooling
RUN mkdir -p /opt/libtree && \
    curl -Lfso /opt/libtree/libtree https://github.com/haampie/libtree/releases/download/v2.0.0/libtree_x86_64 && \
    chmod +x /opt/libtree/libtree


================================================
FILE: docker/gpu/deploy.Dockerfile
================================================
ARG BUILD_ENV

FROM $BUILD_ENV as builder

ARG BLAS

# Build COSMA
COPY . /COSMA

RUN mkdir /COSMA/build && cd /COSMA/build && \
    CC=mpicc CXX=mpicxx cmake .. \
      -DCOSMA_WITH_TESTS=ON \
      -DCUDA_PATH=/usr/local/cuda \
      -DCOSMA_BLAS=CUDA \
      -DCOSMA_SCALAPACK=CUSTOM \
      -DCMAKE_BUILD_TYPE=Release \
      -DCMAKE_INSTALL_PREFIX=/root/COSMA-build && \
      make -j$(nproc) && \
      make install && \
      rm -rf /COSMA

# Run linuxdeploy, and add a bunch of libs that are dlopen'ed by mkl
RUN /opt/libtree/libtree \
      -d /root/COSMA.bundle/ \
      --chrpath \
      --strip \
      /root/COSMA-build/bin/test.cosma \
      /root/COSMA-build/bin/test.mapper \
      /root/COSMA-build/bin/test.multiply \
      /root/COSMA-build/bin/test.multiply_using_layout \
      /root/COSMA-build/bin/test.pdgemm \
      /root/COSMA-build/bin/test.scalar_matmul

FROM ubuntu:20.04

# This is the only thing necessary really from nvidia/cuda's ubuntu18.04 runtime image
ENV NVIDIA_VISIBLE_DEVICES all
ENV NVIDIA_DRIVER_CAPABILITIES compute,utility
ENV NVIDIA_REQUIRE_CUDA "cuda>=10.2"

# Automatically print stacktraces on segfault
ENV LD_PRELOAD=/lib/x86_64-linux-gnu/libSegFault.so

COPY --from=builder /root/COSMA.bundle /root/COSMA.bundle

# Make it easy to call our binaries.
ENV PATH="/root/COSMA.bundle/usr/bin:$PATH"

RUN echo "/root/COSMA.bundle/usr/lib/" > /etc/ld.so.conf.d/cosma.conf && ldconfig

WORKDIR /root/COSMA.bundle/usr/bin


================================================
FILE: libs/gtest_mpi/CMakeLists.txt
================================================
cmake_minimum_required(VERSION 3.24 FATAL_ERROR)
project(gtest_mpi CXX)

add_subdirectory(external/gtest)
add_library(gtest_mpi INTERFACE)
target_include_directories(gtest_mpi INTERFACE ${gtest_mpi_SOURCE_DIR}/include)
target_link_libraries(gtest_mpi INTERFACE gtest)
target_compile_features(gtest_mpi INTERFACE cxx_std_11)


================================================
FILE: libs/gtest_mpi/LICENSE
================================================
This project contains source code from the Googletest framework
obtained from https:github.com/google/googletest with the following
terms:

Copyright 2005, Google Inc.
All rights reserved.

Redistribution and use in source and binary forms, with or without
modification, are permitted provided that the following conditions are
met:

    * Redistributions of source code must retain the above copyright
notice, this list of conditions and the following disclaimer.
    * Redistributions in binary form must reproduce the above
copyright notice, this list of conditions and the following disclaimer
in the documentation and/or other materials provided with the
distribution.
    * Neither the name of Google Inc. nor the names of its
contributors may be used to endorse or promote products derived from
this software without specific prior written permission.

THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
"AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
(INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
---------------------------------------------------------------------

Modifications and additions are published under the following terms:

Copyright 2019, Simon Frasch
All rights reserved.

Redistribution and use in source and binary forms, with or without
modification, are permitted provided that the following conditions are
met:

    * Redistributions of source code must retain the above copyright
notice, this list of conditions and the following disclaimer.
    * Redistributions in binary form must reproduce the above
copyright notice, this list of conditions and the following disclaimer
in the documentation and/or other materials provided with the
distribution.
    * Neither the name of the copyright holder nor the names of its
contributors may be used to endorse or promote products derived from
this software without specific prior written permission.

THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
"AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
(INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
---------------------------------------------------------------------


================================================
FILE: libs/gtest_mpi/README.md
================================================
# GTest MPI
This project provides an extension to the Google Test framework, to allow testing of MPI enabled applications. 
The implementation includes a custom MPI environment and listener, with which all test failure messages are collected on the root process and the output includes the rank index for each failure.


## Requirements
- Google Test version 1.8.1 (other versions may work as well, depending on changes to Listener or Environment interfaces)
- A MPI library
- At least C++ 11.
- Linux or macOS

## Limitations
- All ranks MUST execute all tests in the same order. Within a test, the executed assertions may differ. If a test should run only on a subset of ranks, the excluded ranks must enter the test, but may exit immediately.
- Logging features of Google Test are not supported


## Example
```
#include <mpi.h>
#include "gtest/gtest.h"
#include "gtest_mpi/gtest_mpi.hpp"

int main(int argc, char* argv[]) {
  // Initialize MPI before any call to gtest_mpi
  MPI_Init(&argc, &argv);

  // Intialize google test
  ::testing::InitGoogleTest(&argc, argv);

  // Add a test environment, which will initialize a test communicator
  // (a duplicate of MPI_COMM_WORLD)
  ::testing::AddGlobalTestEnvironment(new gtest_mpi::MPITestEnvironment());

  auto& test_listeners = ::testing::UnitTest::GetInstance()->listeners();

  // Remove default listener and replace with the custom MPI listener
  delete test_listeners.Release(test_listeners.default_result_printer());
  test_listeners.Append(new gtest_mpi::PrettyMPIUnitTestResultPrinter());

  // run tests
  auto exit_code = RUN_ALL_TESTS();

  // Finalize MPI before exiting
  MPI_Finalize();

  return exit_code;
}
```

# License
```
This project contains source code from the Googletest framework
obtained from https:github.com/google/googletest with the following
terms:

Copyright 2005, Google Inc.
All rights reserved.

Redistribution and use in source and binary forms, with or without
modification, are permitted provided that the following conditions are
met:

    * Redistributions of source code must retain the above copyright
notice, this list of conditions and the following disclaimer.
    * Redistributions in binary form must reproduce the above
copyright notice, this list of conditions and the following disclaimer
in the documentation and/or other materials provided with the
distribution.
    * Neither the name of Google Inc. nor the names of its
contributors may be used to endorse or promote products derived from
this software without specific prior written permission.

THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
"AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
(INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
---------------------------------------------------------------------

Modifications and additions are published under the following terms:

Copyright 2019, Simon Frasch
All rights reserved.

Redistribution and use in source and binary forms, with or without
modification, are permitted provided that the following conditions are
met:

    * Redistributions of source code must retain the above copyright
notice, this list of conditions and the following disclaimer.
    * Redistributions in binary form must reproduce the above
copyright notice, this list of conditions and the following disclaimer
in the documentation and/or other materials provided with the
distribution.
    * Neither the name of the copyright holder nor the names of its
contributors may be used to endorse or promote products derived from
this software without specific prior written permission.

THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
"AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
(INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
---------------------------------------------------------------------
```


================================================
FILE: libs/gtest_mpi/external/gtest/CMakeLists.txt
================================================
cmake_minimum_required(VERSION 3.24 FATAL_ERROR)
project(gtest CXX)

add_library(gtest STATIC src/gtest-all.cpp)
target_include_directories(gtest PUBLIC ${gtest_SOURCE_DIR}/include)
target_compile_features(gtest PUBLIC cxx_std_11)


================================================
FILE: libs/gtest_mpi/external/gtest/include/gtest/gtest.h
================================================
// Copyright 2005, Google Inc.
// All rights reserved.
//
// Redistribution and use in source and binary forms, with or without
// modification, are permitted provided that the following conditions are
// met:
//
//     * Redistributions of source code must retain the above copyright
// notice, this list of conditions and the following disclaimer.
//     * Redistributions in binary form must reproduce the above
// copyright notice, this list of conditions and the following disclaimer
// in the documentation and/or other materials provided with the
// distribution.
//     * Neither the name of Google Inc. nor the names of its
// contributors may be used to endorse or promote products derived from
// this software without specific prior written permission.
//
// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
// "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
// LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
// A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
// OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
// SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
// LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
// DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
// THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
//
// Author: wan@google.com (Zhanyong Wan)
//
// The Google C++ Testing Framework (Google Test)
//
// This header file defines the public API for Google Test.  It should be
// included by any test program that uses Google Test.
//
// IMPORTANT NOTE: Due to limitation of the C++ language, we have to
// leave some internal implementation details in this header file.
// They are clearly marked by comments like this:
//
//   // INTERNAL IMPLEMENTATION - DO NOT USE IN A USER PROGRAM.
//
// Such code is NOT meant to be used by a user directly, and is subject
// to CHANGE WITHOUT NOTICE.  Therefore DO NOT DEPEND ON IT in a user
// program!
//
// Acknowledgment: Google Test borrowed the idea of automatic test
// registration from Barthelemy Dagenais' (barthelemy@prologique.com)
// easyUnit framework.

#ifndef GTEST_INCLUDE_GTEST_GTEST_H_
#define GTEST_INCLUDE_GTEST_GTEST_H_

#include <limits>
#include <ostream>
#include <vector>

// Copyright 2005, Google Inc.
// All rights reserved.
//
// Redistribution and use in source and binary forms, with or without
// modification, are permitted provided that the following conditions are
// met:
//
//     * Redistributions of source code must retain the above copyright
// notice, this list of conditions and the following disclaimer.
//     * Redistributions in binary form must reproduce the above
// copyright notice, this list of conditions and the following disclaimer
// in the documentation and/or other materials provided with the
// distribution.
//     * Neither the name of Google Inc. nor the names of its
// contributors may be used to endorse or promote products derived from
// this software without specific prior written permission.
//
// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
// "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
// LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
// A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
// OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
// SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
// LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
// DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
// THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
//
// Authors: wan@google.com (Zhanyong Wan), eefacm@gmail.com (Sean Mcafee)
//
// The Google C++ Testing Framework (Google Test)
//
// This header file declares functions and macros used internally by
// Google Test.  They are subject to change without notice.

#ifndef GTEST_INCLUDE_GTEST_INTERNAL_GTEST_INTERNAL_H_
#define GTEST_INCLUDE_GTEST_INTERNAL_GTEST_INTERNAL_H_

// Copyright 2005, Google Inc.
// All rights reserved.
//
// Redistribution and use in source and binary forms, with or without
// modification, are permitted provided that the following conditions are
// met:
//
//     * Redistributions of source code must retain the above copyright
// notice, this list of conditions and the following disclaimer.
//     * Redistributions in binary form must reproduce the above
// copyright notice, this list of conditions and the following disclaimer
// in the documentation and/or other materials provided with the
// distribution.
//     * Neither the name of Google Inc. nor the names of its
// contributors may be used to endorse or promote products derived from
// this software without specific prior written permission.
//
// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
// "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
// LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
// A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
// OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
// SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
// LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
// DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
// THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
//
// Authors: wan@google.com (Zhanyong Wan)
//
// Low-level types and utilities for porting Google Test to various
// platforms.  All macros ending with _ and symbols defined in an
// internal namespace are subject to change without notice.  Code
// outside Google Test MUST NOT USE THEM DIRECTLY.  Macros that don't
// end with _ are part of Google Test's public API and can be used by
// code outside Google Test.
//
// This file is fundamental to Google Test.  All other Google Test source
// files are expected to #include this.  Therefore, it cannot #include
// any other Google Test header.

#ifndef GTEST_INCLUDE_GTEST_INTERNAL_GTEST_PORT_H_
#define GTEST_INCLUDE_GTEST_INTERNAL_GTEST_PORT_H_

// Environment-describing macros
// -----------------------------
//
// Google Test can be used in many different environments.  Macros in
// this section tell Google Test what kind of environment it is being
// used in, such that Google Test can provide environment-specific
// features and implementations.
//
// Google Test tries to automatically detect the properties of its
// environment, so users usually don't need to worry about these
// macros.  However, the automatic detection is not perfect.
// Sometimes it's necessary for a user to define some of the following
// macros in the build script to override Google Test's decisions.
//
// If the user doesn't define a macro in the list, Google Test will
// provide a default definition.  After this header is #included, all
// macros in this list will be defined to either 1 or 0.
//
// Notes to maintainers:
//   - Each macro here is a user-tweakable knob; do not grow the list
//     lightly.
//   - Use #if to key off these macros.  Don't use #ifdef or "#if
//     defined(...)", which will not work as these macros are ALWAYS
//     defined.
//
//   GTEST_HAS_CLONE          - Define it to 1/0 to indicate that clone(2)
//                              is/isn't available.
//   GTEST_HAS_EXCEPTIONS     - Define it to 1/0 to indicate that exceptions
//                              are enabled.
//   GTEST_HAS_GLOBAL_STRING  - Define it to 1/0 to indicate that ::string
//                              is/isn't available (some systems define
//                              ::string, which is different to std::string).
//   GTEST_HAS_GLOBAL_WSTRING - Define it to 1/0 to indicate that ::string
//                              is/isn't available (some systems define
//                              ::wstring, which is different to std::wstring).
//   GTEST_HAS_POSIX_RE       - Define it to 1/0 to indicate that POSIX regular
//                              expressions are/aren't available.
//   GTEST_HAS_PTHREAD        - Define it to 1/0 to indicate that <pthread.h>
//                              is/isn't available.
//   GTEST_HAS_RTTI           - Define it to 1/0 to indicate that RTTI is/isn't
//                              enabled.
//   GTEST_HAS_STD_WSTRING    - Define it to 1/0 to indicate that
//                              std::wstring does/doesn't work (Google Test can
//                              be used where std::wstring is unavailable).
//   GTEST_HAS_TR1_TUPLE      - Define it to 1/0 to indicate tr1::tuple
//                              is/isn't available.
//   GTEST_HAS_SEH            - Define it to 1/0 to indicate whether the
//                              compiler supports Microsoft's "Structured
//                              Exception Handling".
//   GTEST_HAS_STREAM_REDIRECTION
//                            - Define it to 1/0 to indicate whether the
//                              platform supports I/O stream redirection using
//                              dup() and dup2().
//   GTEST_USE_OWN_TR1_TUPLE  - Define it to 1/0 to indicate whether Google
//                              Test's own tr1 tuple implementation should be
//                              used.  Unused when the user sets
//                              GTEST_HAS_TR1_TUPLE to 0.
//   GTEST_LANG_CXX11         - Define it to 1/0 to indicate that Google Test
//                              is building in C++11/C++98 mode.
//   GTEST_LINKED_AS_SHARED_LIBRARY
//                            - Define to 1 when compiling tests that use
//                              Google Test as a shared library (known as
//                              DLL on Windows).
//   GTEST_CREATE_SHARED_LIBRARY
//                            - Define to 1 when compiling Google Test itself
//                              as a shared library.

// Platform-indicating macros
// --------------------------
//
// Macros indicating the platform on which Google Test is being used
// (a macro is defined to 1 if compiled on the given platform;
// otherwise UNDEFINED -- it's never defined to 0.).  Google Test
// defines these macros automatically.  Code outside Google Test MUST
// NOT define them.
//
//   GTEST_OS_AIX      - IBM AIX
//   GTEST_OS_CYGWIN   - Cygwin
//   GTEST_OS_FREEBSD  - FreeBSD
//   GTEST_OS_HPUX     - HP-UX
//   GTEST_OS_LINUX    - Linux
//     GTEST_OS_LINUX_ANDROID - Google Android
//   GTEST_OS_MAC      - Mac OS X
//     GTEST_OS_IOS    - iOS
//   GTEST_OS_NACL     - Google Native Client (NaCl)
//   GTEST_OS_OPENBSD  - OpenBSD
//   GTEST_OS_QNX      - QNX
//   GTEST_OS_SOLARIS  - Sun Solaris
//   GTEST_OS_SYMBIAN  - Symbian
//   GTEST_OS_WINDOWS  - Windows (Desktop, MinGW, or Mobile)
//     GTEST_OS_WINDOWS_DESKTOP  - Windows Desktop
//     GTEST_OS_WINDOWS_MINGW    - MinGW
//     GTEST_OS_WINDOWS_MOBILE   - Windows Mobile
//     GTEST_OS_WINDOWS_PHONE    - Windows Phone
//     GTEST_OS_WINDOWS_RT       - Windows Store App/WinRT
//   GTEST_OS_ZOS      - z/OS
//
// Among the platforms, Cygwin, Linux, Max OS X, and Windows have the
// most stable support.  Since core members of the Google Test project
// don't have access to other platforms, support for them may be less
// stable.  If you notice any problems on your platform, please notify
// googletestframework@googlegroups.com (patches for fixing them are
// even more welcome!).
//
// It is possible that none of the GTEST_OS_* macros are defined.

// Feature-indicating macros
// -------------------------
//
// Macros indicating which Google Test features are available (a macro
// is defined to 1 if the corresponding feature is supported;
// otherwise UNDEFINED -- it's never defined to 0.).  Google Test
// defines these macros automatically.  Code outside Google Test MUST
// NOT define them.
//
// These macros are public so that portable tests can be written.
// Such tests typically surround code using a feature with an #if
// which controls that code.  For example:
//
// #if GTEST_HAS_DEATH_TEST
//   EXPECT_DEATH(DoSomethingDeadly());
// #endif
//
//   GTEST_HAS_COMBINE      - the Combine() function (for value-parameterized
//                            tests)
//   GTEST_HAS_DEATH_TEST   - death tests
//   GTEST_HAS_PARAM_TEST   - value-parameterized tests
//   GTEST_HAS_TYPED_TEST   - typed tests
//   GTEST_HAS_TYPED_TEST_P - type-parameterized tests
//   GTEST_IS_THREADSAFE    - Google Test is thread-safe.
//   GTEST_USES_POSIX_RE    - enhanced POSIX regex is used. Do not confuse with
//                            GTEST_HAS_POSIX_RE (see above) which users can
//                            define themselves.
//   GTEST_USES_SIMPLE_RE   - our own simple regex is used;
//                            the above two are mutually exclusive.
//   GTEST_CAN_COMPARE_NULL - accepts untyped NULL in EXPECT_EQ().

// Misc public macros
// ------------------
//
//   GTEST_FLAG(flag_name)  - references the variable corresponding to
//                            the given Google Test flag.

// Internal utilities
// ------------------
//
// The following macros and utilities are for Google Test's INTERNAL
// use only.  Code outside Google Test MUST NOT USE THEM DIRECTLY.
//
// Macros for basic C++ coding:
//   GTEST_AMBIGUOUS_ELSE_BLOCKER_ - for disabling a gcc warning.
//   GTEST_ATTRIBUTE_UNUSED_  - declares that a class' instances or a
//                              variable don't have to be used.
//   GTEST_DISALLOW_ASSIGN_   - disables operator=.
//   GTEST_DISALLOW_COPY_AND_ASSIGN_ - disables copy ctor and operator=.
//   GTEST_MUST_USE_RESULT_   - declares that a function's result must be used.
//   GTEST_INTENTIONAL_CONST_COND_PUSH_ - start code section where MSVC C4127 is
//                                        suppressed (constant conditional).
//   GTEST_INTENTIONAL_CONST_COND_POP_  - finish code section where MSVC C4127
//                                        is suppressed.
//
// C++11 feature wrappers:
//
//   testing::internal::move  - portability wrapper for std::move.
//
// Synchronization:
//   Mutex, MutexLock, ThreadLocal, GetThreadCount()
//                            - synchronization primitives.
//
// Template meta programming:
//   is_pointer     - as in TR1; needed on Symbian and IBM XL C/C++ only.
//   IteratorTraits - partial implementation of std::iterator_traits, which
//                    is not available in libCstd when compiled with Sun C++.
//
// Smart pointers:
//   scoped_ptr     - as in TR2.
//
// Regular expressions:
//   RE             - a simple regular expression class using the POSIX
//                    Extended Regular Expression syntax on UNIX-like
//                    platforms, or a reduced regular exception syntax on
//                    other platforms, including Windows.
//
// Logging:
//   GTEST_LOG_()   - logs messages at the specified severity level.
//   LogToStderr()  - directs all log messages to stderr.
//   FlushInfoLog() - flushes informational log messages.
//
// Stdout and stderr capturing:
//   CaptureStdout()     - starts capturing stdout.
//   GetCapturedStdout() - stops capturing stdout and returns the captured
//                         string.
//   CaptureStderr()     - starts capturing stderr.
//   GetCapturedStderr() - stops capturing stderr and returns the captured
//                         string.
//
// Integer types:
//   TypeWithSize   - maps an integer to a int type.
//   Int32, UInt32, Int64, UInt64, TimeInMillis
//                  - integers of known sizes.
//   BiggestInt     - the biggest signed integer type.
//
// Command-line utilities:
//   GTEST_DECLARE_*()  - declares a flag.
//   GTEST_DEFINE_*()   - defines a flag.
//   GetInjectableArgvs() - returns the command line as a vector of strings.
//
// Environment variable utilities:
//   GetEnv()             - gets the value of an environment variable.
//   BoolFromGTestEnv()   - parses a bool environment variable.
//   Int32FromGTestEnv()  - parses an Int32 environment variable.
//   StringFromGTestEnv() - parses a string environment variable.

#include <ctype.h>   // for isspace, etc
#include <stddef.h>  // for ptrdiff_t
#include <stdlib.h>
#include <stdio.h>
#include <string.h>
#ifndef _WIN32_WCE
# include <sys/types.h>
# include <sys/stat.h>
#endif  // !_WIN32_WCE

#if defined __APPLE__
# include <AvailabilityMacros.h>
# include <TargetConditionals.h>
#endif

#include <algorithm>  // NOLINT
#include <iostream>  // NOLINT
#include <sstream>  // NOLINT
#include <string>  // NOLINT
#include <utility>
#include <vector>  // NOLINT

// Copyright 2015, Google Inc.
// All rights reserved.
//
// Redistribution and use in source and binary forms, with or without
// modification, are permitted provided that the following conditions are
// met:
//
//     * Redistributions of source code must retain the above copyright
// notice, this list of conditions and the following disclaimer.
//     * Redistributions in binary form must reproduce the above
// copyright notice, this list of conditions and the following disclaimer
// in the documentation and/or other materials provided with the
// distribution.
//     * Neither the name of Google Inc. nor the names of its
// contributors may be used to endorse or promote products derived from
// this software without specific prior written permission.
//
// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
// "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
// LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
// A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
// OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
// SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
// LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
// DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
// THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
//
// The Google C++ Testing Framework (Google Test)
//
// This header file defines the GTEST_OS_* macro.
// It is separate from gtest-port.h so that custom/gtest-port.h can include it.

#ifndef GTEST_INCLUDE_GTEST_INTERNAL_GTEST_PORT_ARCH_H_
#define GTEST_INCLUDE_GTEST_INTERNAL_GTEST_PORT_ARCH_H_

// Determines the platform on which Google Test is compiled.
#ifdef __CYGWIN__
# define GTEST_OS_CYGWIN 1
#elif defined __SYMBIAN32__
# define GTEST_OS_SYMBIAN 1
#elif defined _WIN32
# define GTEST_OS_WINDOWS 1
# ifdef _WIN32_WCE
#  define GTEST_OS_WINDOWS_MOBILE 1
# elif defined(__MINGW__) || defined(__MINGW32__)
#  define GTEST_OS_WINDOWS_MINGW 1
# elif defined(WINAPI_FAMILY)
#  include <winapifamily.h>
#  if WINAPI_FAMILY_PARTITION(WINAPI_PARTITION_DESKTOP)
#   define GTEST_OS_WINDOWS_DESKTOP 1
#  elif WINAPI_FAMILY_PARTITION(WINAPI_PARTITION_PHONE_APP)
#   define GTEST_OS_WINDOWS_PHONE 1
#  elif WINAPI_FAMILY_PARTITION(WINAPI_PARTITION_APP)
#   define GTEST_OS_WINDOWS_RT 1
#  else
    // WINAPI_FAMILY defined but no known partition matched.
    // Default to desktop.
#   define GTEST_OS_WINDOWS_DESKTOP 1
#  endif
# else
#  define GTEST_OS_WINDOWS_DESKTOP 1
# endif  // _WIN32_WCE
#elif defined __APPLE__
# define GTEST_OS_MAC 1
# if TARGET_OS_IPHONE
#  define GTEST_OS_IOS 1
# endif
#elif defined __FreeBSD__
# define GTEST_OS_FREEBSD 1
#elif defined __linux__
# define GTEST_OS_LINUX 1
# if defined __ANDROID__
#  define GTEST_OS_LINUX_ANDROID 1
# endif
#elif defined __MVS__
# define GTEST_OS_ZOS 1
#elif defined(__sun) && defined(__SVR4)
# define GTEST_OS_SOLARIS 1
#elif defined(_AIX)
# define GTEST_OS_AIX 1
#elif defined(__hpux)
# define GTEST_OS_HPUX 1
#elif defined __native_client__
# define GTEST_OS_NACL 1
#elif defined __OpenBSD__
# define GTEST_OS_OPENBSD 1
#elif defined __QNX__
# define GTEST_OS_QNX 1
#endif  // __CYGWIN__

#endif  // GTEST_INCLUDE_GTEST_INTERNAL_GTEST_PORT_ARCH_H_
// Copyright 2015, Google Inc.
// All rights reserved.
//
// Redistribution and use in source and binary forms, with or without
// modification, are permitted provided that the following conditions are
// met:
//
//     * Redistributions of source code must retain the above copyright
// notice, this list of conditions and the following disclaimer.
//     * Redistributions in binary form must reproduce the above
// copyright notice, this list of conditions and the following disclaimer
// in the documentation and/or other materials provided with the
// distribution.
//     * Neither the name of Google Inc. nor the names of its
// contributors may be used to endorse or promote products derived from
// this software without specific prior written permission.
//
// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
// "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
// LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
// A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
// OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
// SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
// LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
// DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
// THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
//
// Injection point for custom user configurations.
// The following macros can be defined:
//
//   Flag related macros:
//     GTEST_FLAG(flag_name)
//     GTEST_USE_OWN_FLAGFILE_FLAG_  - Define to 0 when the system provides its
//                                     own flagfile flag parsing.
//     GTEST_DECLARE_bool_(name)
//     GTEST_DECLARE_int32_(name)
//     GTEST_DECLARE_string_(name)
//     GTEST_DEFINE_bool_(name, default_val, doc)
//     GTEST_DEFINE_int32_(name, default_val, doc)
//     GTEST_DEFINE_string_(name, default_val, doc)
//
//   Test filtering:
//     GTEST_TEST_FILTER_ENV_VAR_ - The name of an environment variable that
//                                  will be used if --GTEST_FLAG(test_filter)
//                                  is not provided.
//
//   Logging:
//     GTEST_LOG_(severity)
//     GTEST_CHECK_(condition)
//     Functions LogToStderr() and FlushInfoLog() have to be provided too.
//
//   Threading:
//     GTEST_HAS_NOTIFICATION_ - Enabled if Notification is already provided.
//     GTEST_HAS_MUTEX_AND_THREAD_LOCAL_ - Enabled if Mutex and ThreadLocal are
//                                         already provided.
//     Must also provide GTEST_DECLARE_STATIC_MUTEX_(mutex) and
//     GTEST_DEFINE_STATIC_MUTEX_(mutex)
//
//     GTEST_EXCLUSIVE_LOCK_REQUIRED_(locks)
//     GTEST_LOCK_EXCLUDED_(locks)
//
// ** Custom implementation starts here **

#ifndef GTEST_INCLUDE_GTEST_INTERNAL_CUSTOM_GTEST_PORT_H_
#define GTEST_INCLUDE_GTEST_INTERNAL_CUSTOM_GTEST_PORT_H_

#endif  // GTEST_INCLUDE_GTEST_INTERNAL_CUSTOM_GTEST_PORT_H_

#if !defined(GTEST_DEV_EMAIL_)
# define GTEST_DEV_EMAIL_ "googletestframework@@googlegroups.com"
# define GTEST_FLAG_PREFIX_ "gtest_"
# define GTEST_FLAG_PREFIX_DASH_ "gtest-"
# define GTEST_FLAG_PREFIX_UPPER_ "GTEST_"
# define GTEST_NAME_ "Google Test"
# define GTEST_PROJECT_URL_ "http://code.google.com/p/googletest/"
#endif  // !defined(GTEST_DEV_EMAIL_)

#if !defined(GTEST_INIT_GOOGLE_TEST_NAME_)
# define GTEST_INIT_GOOGLE_TEST_NAME_ "testing::InitGoogleTest"
#endif  // !defined(GTEST_INIT_GOOGLE_TEST_NAME_)

// Determines the version of gcc that is used to compile this.
#ifdef __GNUC__
// 40302 means version 4.3.2.
# define GTEST_GCC_VER_ \
    (__GNUC__*10000 + __GNUC_MINOR__*100 + __GNUC_PATCHLEVEL__)
#endif  // __GNUC__

// Macros for disabling Microsoft Visual C++ warnings.
//
//   GTEST_DISABLE_MSC_WARNINGS_PUSH_(4800 4385)
//   /* code that triggers warnings C4800 and C4385 */
//   GTEST_DISABLE_MSC_WARNINGS_POP_()
#if _MSC_VER >= 1500
# define GTEST_DISABLE_MSC_WARNINGS_PUSH_(warnings) \
    __pragma(warning(push))                        \
    __pragma(warning(disable: warnings))
# define GTEST_DISABLE_MSC_WARNINGS_POP_()          \
    __pragma(warning(pop))
#else
// Older versions of MSVC don't have __pragma.
# define GTEST_DISABLE_MSC_WARNINGS_PUSH_(warnings)
# define GTEST_DISABLE_MSC_WARNINGS_POP_()
#endif

#ifndef GTEST_LANG_CXX11
// gcc and clang define __GXX_EXPERIMENTAL_CXX0X__ when
// -std={c,gnu}++{0x,11} is passed.  The C++11 standard specifies a
// value for __cplusplus, and recent versions of clang, gcc, and
// probably other compilers set that too in C++11 mode.
# if __GXX_EXPERIMENTAL_CXX0X__ || __cplusplus >= 201103L
// Compiling in at least C++11 mode.
#  define GTEST_LANG_CXX11 1
# else
#  define GTEST_LANG_CXX11 0
# endif
#endif

// Distinct from C++11 language support, some environments don't provide
// proper C++11 library support. Notably, it's possible to build in
// C++11 mode when targeting Mac OS X 10.6, which has an old libstdc++
// with no C++11 support.
//
// libstdc++ has sufficient C++11 support as of GCC 4.6.0, __GLIBCXX__
// 20110325, but maintenance releases in the 4.4 and 4.5 series followed
// this date, so check for those versions by their date stamps.
// https://gcc.gnu.org/onlinedocs/libstdc++/manual/abi.html#abi.versioning
#if GTEST_LANG_CXX11 && \
    (!defined(__GLIBCXX__) || ( \
        __GLIBCXX__ >= 20110325ul &&  /* GCC >= 4.6.0 */ \
        /* Blacklist of patch releases of older branches: */ \
        __GLIBCXX__ != 20110416ul &&  /* GCC 4.4.6 */ \
        __GLIBCXX__ != 20120313ul &&  /* GCC 4.4.7 */ \
        __GLIBCXX__ != 20110428ul &&  /* GCC 4.5.3 */ \
        __GLIBCXX__ != 20120702ul))   /* GCC 4.5.4 */
# define GTEST_STDLIB_CXX11 1
#endif

// Only use C++11 library features if the library provides them.
#if GTEST_STDLIB_CXX11
# define GTEST_HAS_STD_BEGIN_AND_END_ 1
# define GTEST_HAS_STD_FORWARD_LIST_ 1
# define GTEST_HAS_STD_FUNCTION_ 1
# define GTEST_HAS_STD_INITIALIZER_LIST_ 1
# define GTEST_HAS_STD_MOVE_ 1
# define GTEST_HAS_STD_SHARED_PTR_ 1
# define GTEST_HAS_STD_TYPE_TRAITS_ 1
# define GTEST_HAS_STD_UNIQUE_PTR_ 1
#endif

// C++11 specifies that <tuple> provides std::tuple.
// Some platforms still might not have it, however.
#if GTEST_LANG_CXX11
# define GTEST_HAS_STD_TUPLE_ 1
# if defined(__clang__)
// Inspired by http://clang.llvm.org/docs/LanguageExtensions.html#__has_include
#  if defined(__has_include) && !__has_include(<tuple>)
#   undef GTEST_HAS_STD_TUPLE_
#  endif
# elif defined(_MSC_VER)
// Inspired by boost/config/stdlib/dinkumware.hpp
#  if defined(_CPPLIB_VER) && _CPPLIB_VER < 520
#   undef GTEST_HAS_STD_TUPLE_
#  endif
# elif defined(__GLIBCXX__)
// Inspired by boost/config/stdlib/libstdcpp3.hpp,
// http://gcc.gnu.org/gcc-4.2/changes.html and
// http://gcc.gnu.org/onlinedocs/libstdc++/manual/bk01pt01ch01.html#manual.intro.status.standard.200x
#  if __GNUC__ < 4 || (__GNUC__ == 4 && __GNUC_MINOR__ < 2)
#   undef GTEST_HAS_STD_TUPLE_
#  endif
# endif
#endif

// Brings in definitions for functions used in the testing::internal::posix
// namespace (read, write, close, chdir, isatty, stat). We do not currently
// use them on Windows Mobile.
#if GTEST_OS_WINDOWS
# if !GTEST_OS_WINDOWS_MOBILE
#  include <direct.h>
#  include <io.h>
# endif
// In order to avoid having to include <windows.h>, use forward declaration
// assuming CRITICAL_SECTION is a typedef of _RTL_CRITICAL_SECTION.
// This assumption is verified by
// WindowsTypesTest.CRITICAL_SECTIONIs_RTL_CRITICAL_SECTION.
struct _RTL_CRITICAL_SECTION;
#else
// This assumes that non-Windows OSes provide unistd.h. For OSes where this
// is not the case, we need to include headers that provide the functions
// mentioned above.
# include <unistd.h>
# include <strings.h>
#endif  // GTEST_OS_WINDOWS

#if GTEST_OS_LINUX_ANDROID
// Used to define __ANDROID_API__ matching the target NDK API level.
#  include <android/api-level.h>  // NOLINT
#endif

// Defines this to true iff Google Test can use POSIX regular expressions.
#ifndef GTEST_HAS_POSIX_RE
# if GTEST_OS_LINUX_ANDROID
// On Android, <regex.h> is only available starting with Gingerbread.
#  define GTEST_HAS_POSIX_RE (__ANDROID_API__ >= 9)
# else
#  define GTEST_HAS_POSIX_RE (!GTEST_OS_WINDOWS)
# endif
#endif

#if GTEST_USES_PCRE
// The appropriate headers have already been included.

#elif GTEST_HAS_POSIX_RE

// On some platforms, <regex.h> needs someone to define size_t, and
// won't compile otherwise.  We can #include it here as we already
// included <stdlib.h>, which is guaranteed to define size_t through
// <stddef.h>.
# include <regex.h>  // NOLINT

# define GTEST_USES_POSIX_RE 1

#elif GTEST_OS_WINDOWS

// <regex.h> is not available on Windows.  Use our own simple regex
// implementation instead.
# define GTEST_USES_SIMPLE_RE 1

#else

// <regex.h> may not be available on this platform.  Use our own
// simple regex implementation instead.
# define GTEST_USES_SIMPLE_RE 1

#endif  // GTEST_USES_PCRE

#ifndef GTEST_HAS_EXCEPTIONS
// The user didn't tell us whether exceptions are enabled, so we need
// to figure it out.
# if defined(_MSC_VER) || defined(__BORLANDC__)
// MSVC's and C++Builder's implementations of the STL use the _HAS_EXCEPTIONS
// macro to enable exceptions, so we'll do the same.
// Assumes that exceptions are enabled by default.
#  ifndef _HAS_EXCEPTIONS
#   define _HAS_EXCEPTIONS 1
#  endif  // _HAS_EXCEPTIONS
#  define GTEST_HAS_EXCEPTIONS _HAS_EXCEPTIONS
# elif defined(__clang__)
// clang defines __EXCEPTIONS iff exceptions are enabled before clang 220714,
// but iff cleanups are enabled after that. In Obj-C++ files, there can be
// cleanups for ObjC exceptions which also need cleanups, even if C++ exceptions
// are disabled. clang has __has_feature(cxx_exceptions) which checks for C++
// exceptions starting at clang r206352, but which checked for cleanups prior to
// that. To reliably check for C++ exception availability with clang, check for
// __EXCEPTIONS && __has_feature(cxx_exceptions).
#  define GTEST_HAS_EXCEPTIONS (__EXCEPTIONS && __has_feature(cxx_exceptions))
# elif defined(__GNUC__) && __EXCEPTIONS
// gcc defines __EXCEPTIONS to 1 iff exceptions are enabled.
#  define GTEST_HAS_EXCEPTIONS 1
# elif defined(__SUNPRO_CC)
// Sun Pro CC supports exceptions.  However, there is no compile-time way of
// detecting whether they are enabled or not.  Therefore, we assume that
// they are enabled unless the user tells us otherwise.
#  define GTEST_HAS_EXCEPTIONS 1
# elif defined(__IBMCPP__) && __EXCEPTIONS
// xlC defines __EXCEPTIONS to 1 iff exceptions are enabled.
#  define GTEST_HAS_EXCEPTIONS 1
# elif defined(__HP_aCC)
// Exception handling is in effect by default in HP aCC compiler. It has to
// be turned of by +noeh compiler option if desired.
#  define GTEST_HAS_EXCEPTIONS 1
# else
// For other compilers, we assume exceptions are disabled to be
// conservative.
#  define GTEST_HAS_EXCEPTIONS 0
# endif  // defined(_MSC_VER) || defined(__BORLANDC__)
#endif  // GTEST_HAS_EXCEPTIONS

#if !defined(GTEST_HAS_STD_STRING)
// Even though we don't use this macro any longer, we keep it in case
// some clients still depend on it.
# define GTEST_HAS_STD_STRING 1
#elif !GTEST_HAS_STD_STRING
// The user told us that ::std::string isn't available.
# error "Google Test cannot be used where ::std::string isn't available."
#endif  // !defined(GTEST_HAS_STD_STRING)

#ifndef GTEST_HAS_GLOBAL_STRING
// The user didn't tell us whether ::string is available, so we need
// to figure it out.

# define GTEST_HAS_GLOBAL_STRING 0

#endif  // GTEST_HAS_GLOBAL_STRING

#ifndef GTEST_HAS_STD_WSTRING
// The user didn't tell us whether ::std::wstring is available, so we need
// to figure it out.
// TODO(wan@google.com): uses autoconf to detect whether ::std::wstring
//   is available.

// Cygwin 1.7 and below doesn't support ::std::wstring.
// Solaris' libc++ doesn't support it either.  Android has
// no support for it at least as recent as Froyo (2.2).
# define GTEST_HAS_STD_WSTRING \
    (!(GTEST_OS_LINUX_ANDROID || GTEST_OS_CYGWIN || GTEST_OS_SOLARIS))

#endif  // GTEST_HAS_STD_WSTRING

#ifndef GTEST_HAS_GLOBAL_WSTRING
// The user didn't tell us whether ::wstring is available, so we need
// to figure it out.
# define GTEST_HAS_GLOBAL_WSTRING \
    (GTEST_HAS_STD_WSTRING && GTEST_HAS_GLOBAL_STRING)
#endif  // GTEST_HAS_GLOBAL_WSTRING

// Determines whether RTTI is available.
#ifndef GTEST_HAS_RTTI
// The user didn't tell us whether RTTI is enabled, so we need to
// figure it out.

# ifdef _MSC_VER

#  ifdef _CPPRTTI  // MSVC defines this macro iff RTTI is enabled.
#   define GTEST_HAS_RTTI 1
#  else
#   define GTEST_HAS_RTTI 0
#  endif

// Starting with version 4.3.2, gcc defines __GXX_RTTI iff RTTI is enabled.
# elif defined(__GNUC__) && (GTEST_GCC_VER_ >= 40302)

#  ifdef __GXX_RTTI
// When building against STLport with the Android NDK and with
// -frtti -fno-exceptions, the build fails at link time with undefined
// references to __cxa_bad_typeid. Note sure if STL or toolchain bug,
// so disable RTTI when detected.
#   if GTEST_OS_LINUX_ANDROID && defined(_STLPORT_MAJOR) && \
       !defined(__EXCEPTIONS)
#    define GTEST_HAS_RTTI 0
#   else
#    define GTEST_HAS_RTTI 1
#   endif  // GTEST_OS_LINUX_ANDROID && __STLPORT_MAJOR && !__EXCEPTIONS
#  else
#   define GTEST_HAS_RTTI 0
#  endif  // __GXX_RTTI

// Clang defines __GXX_RTTI starting with version 3.0, but its manual recommends
// using has_feature instead. has_feature(cxx_rtti) is supported since 2.7, the
// first version with C++ support.
# elif defined(__clang__)

#  define GTEST_HAS_RTTI __has_feature(cxx_rtti)

// Starting with version 9.0 IBM Visual Age defines __RTTI_ALL__ to 1 if
// both the typeid and dynamic_cast features are present.
# elif defined(__IBMCPP__) && (__IBMCPP__ >= 900)

#  ifdef __RTTI_ALL__
#   define GTEST_HAS_RTTI 1
#  else
#   define GTEST_HAS_RTTI 0
#  endif

# else

// For all other compilers, we assume RTTI is enabled.
#  define GTEST_HAS_RTTI 1

# endif  // _MSC_VER

#endif  // GTEST_HAS_RTTI

// It's this header's responsibility to #include <typeinfo> when RTTI
// is enabled.
#if GTEST_HAS_RTTI
# include <typeinfo>
#endif

// Determines whether Google Test can use the pthreads library.
#ifndef GTEST_HAS_PTHREAD
// The user didn't tell us explicitly, so we make reasonable assumptions about
// which platforms have pthreads support.
//
// To disable threading support in Google Test, add -DGTEST_HAS_PTHREAD=0
// to your compiler flags.
# define GTEST_HAS_PTHREAD (GTEST_OS_LINUX || GTEST_OS_MAC || GTEST_OS_HPUX \
    || GTEST_OS_QNX || GTEST_OS_FREEBSD || GTEST_OS_NACL)
#endif  // GTEST_HAS_PTHREAD

#if GTEST_HAS_PTHREAD
// gtest-port.h guarantees to #include <pthread.h> when GTEST_HAS_PTHREAD is
// true.
# include <pthread.h>  // NOLINT

// For timespec and nanosleep, used below.
# include <time.h>  // NOLINT
#endif

// Determines if hash_map/hash_set are available.
// Only used for testing against those containers.
#if !defined(GTEST_HAS_HASH_MAP_)
# if _MSC_VER
#  define GTEST_HAS_HASH_MAP_ 1  // Indicates that hash_map is available.
#  define GTEST_HAS_HASH_SET_ 1  // Indicates that hash_set is available.
# endif  // _MSC_VER
#endif  // !defined(GTEST_HAS_HASH_MAP_)

// Determines whether Google Test can use tr1/tuple.  You can define
// this macro to 0 to prevent Google Test from using tuple (any
// feature depending on tuple with be disabled in this mode).
#ifndef GTEST_HAS_TR1_TUPLE
# if GTEST_OS_LINUX_ANDROID && defined(_STLPORT_MAJOR)
// STLport, provided with the Android NDK, has neither <tr1/tuple> or <tuple>.
#  define GTEST_HAS_TR1_TUPLE 0
# else
// The user didn't tell us not to do it, so we assume it's OK.
#  define GTEST_HAS_TR1_TUPLE 1
# endif
#endif  // GTEST_HAS_TR1_TUPLE

// Determines whether Google Test's own tr1 tuple implementation
// should be used.
#ifndef GTEST_USE_OWN_TR1_TUPLE
// The user didn't tell us, so we need to figure it out.

// We use our own TR1 tuple if we aren't sure the user has an
// implementation of it already.  At this time, libstdc++ 4.0.0+ and
// MSVC 2010 are the only mainstream standard libraries that come
// with a TR1 tuple implementation.  NVIDIA's CUDA NVCC compiler
// pretends to be GCC by defining __GNUC__ and friends, but cannot
// compile GCC's tuple implementation.  MSVC 2008 (9.0) provides TR1
// tuple in a 323 MB Feature Pack download, which we cannot assume the
// user has.  QNX's QCC compiler is a modified GCC but it doesn't
// support TR1 tuple.  libc++ only provides std::tuple, in C++11 mode,
// and it can be used with some compilers that define __GNUC__.
# if (defined(__GNUC__) && !defined(__CUDACC__) && (GTEST_GCC_VER_ >= 40000) \
      && !GTEST_OS_QNX && !defined(_LIBCPP_VERSION)) || _MSC_VER >= 1600
#  define GTEST_ENV_HAS_TR1_TUPLE_ 1
# endif

// C++11 specifies that <tuple> provides std::tuple. Use that if gtest is used
// in C++11 mode and libstdc++ isn't very old (binaries targeting OS X 10.6
// can build with clang but need to use gcc4.2's libstdc++).
# if GTEST_LANG_CXX11 && (!defined(__GLIBCXX__) || __GLIBCXX__ > 20110325)
#  define GTEST_ENV_HAS_STD_TUPLE_ 1
# endif

# if GTEST_ENV_HAS_TR1_TUPLE_ || GTEST_ENV_HAS_STD_TUPLE_
#  define GTEST_USE_OWN_TR1_TUPLE 0
# else
#  define GTEST_USE_OWN_TR1_TUPLE 1
# endif

#endif  // GTEST_USE_OWN_TR1_TUPLE

// To avoid conditional compilation everywhere, we make it
// gtest-port.h's responsibility to #include the header implementing
// tuple.
#if GTEST_HAS_STD_TUPLE_
# include <tuple>  // IWYU pragma: export
# define GTEST_TUPLE_NAMESPACE_ ::std
#endif  // GTEST_HAS_STD_TUPLE_

// We include tr1::tuple even if std::tuple is available to define printers for
// them.
#if GTEST_HAS_TR1_TUPLE
# ifndef GTEST_TUPLE_NAMESPACE_
#  define GTEST_TUPLE_NAMESPACE_ ::std::tr1
# endif  // GTEST_TUPLE_NAMESPACE_

# if GTEST_USE_OWN_TR1_TUPLE
// This file was GENERATED by command:
//     pump.py gtest-tuple.h.pump
// DO NOT EDIT BY HAND!!!

// Copyright 2009 Google Inc.
// All Rights Reserved.
//
// Redistribution and use in source and binary forms, with or without
// modification, are permitted provided that the following conditions are
// met:
//
//     * Redistributions of source code must retain the above copyright
// notice, this list of conditions and the following disclaimer.
//     * Redistributions in binary form must reproduce the above
// copyright notice, this list of conditions and the following disclaimer
// in the documentation and/or other materials provided with the
// distribution.
//     * Neither the name of Google Inc. nor the names of its
// contributors may be used to endorse or promote products derived from
// this software without specific prior written permission.
//
// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
// "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
// LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
// A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
// OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
// SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
// LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
// DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
// THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
//
// Author: wan@google.com (Zhanyong Wan)

// Implements a subset of TR1 tuple needed by Google Test and Google Mock.

#ifndef GTEST_INCLUDE_GTEST_INTERNAL_GTEST_TUPLE_H_
#define GTEST_INCLUDE_GTEST_INTERNAL_GTEST_TUPLE_H_

#include <utility>  // For ::std::pair.

// The compiler used in Symbian has a bug that prevents us from declaring the
// tuple template as a friend (it complains that tuple is redefined).  This
// hack bypasses the bug by declaring the members that should otherwise be
// private as public.
// Sun Studio versions < 12 also have the above bug.
#if defined(__SYMBIAN32__) || (defined(__SUNPRO_CC) && __SUNPRO_CC < 0x590)
# define GTEST_DECLARE_TUPLE_AS_FRIEND_ public:
#else
# define GTEST_DECLARE_TUPLE_AS_FRIEND_ \
    template <GTEST_10_TYPENAMES_(U)> friend class tuple; \
   private:
#endif

// Visual Studio 2010, 2012, and 2013 define symbols in std::tr1 that conflict
// with our own definitions. Therefore using our own tuple does not work on
// those compilers.
#if defined(_MSC_VER) && _MSC_VER >= 1600  /* 1600 is Visual Studio 2010 */
# error "gtest's tuple doesn't compile on Visual Studio 2010 or later. \
GTEST_USE_OWN_TR1_TUPLE must be set to 0 on those compilers."
#endif

// GTEST_n_TUPLE_(T) is the type of an n-tuple.
#define GTEST_0_TUPLE_(T) tuple<>
#define GTEST_1_TUPLE_(T) tuple<T##0, void, void, void, void, void, void, \
    void, void, void>
#define GTEST_2_TUPLE_(T) tuple<T##0, T##1, void, void, void, void, void, \
    void, void, void>
#define GTEST_3_TUPLE_(T) tuple<T##0, T##1, T##2, void, void, void, void, \
    void, void, void>
#define GTEST_4_TUPLE_(T) tuple<T##0, T##1, T##2, T##3, void, void, void, \
    void, void, void>
#define GTEST_5_TUPLE_(T) tuple<T##0, T##1, T##2, T##3, T##4, void, void, \
    void, void, void>
#define GTEST_6_TUPLE_(T) tuple<T##0, T##1, T##2, T##3, T##4, T##5, void, \
    void, void, void>
#define GTEST_7_TUPLE_(T) tuple<T##0, T##1, T##2, T##3, T##4, T##5, T##6, \
    void, void, void>
#define GTEST_8_TUPLE_(T) tuple<T##0, T##1, T##2, T##3, T##4, T##5, T##6, \
    T##7, void, void>
#define GTEST_9_TUPLE_(T) tuple<T##0, T##1, T##2, T##3, T##4, T##5, T##6, \
    T##7, T##8, void>
#define GTEST_10_TUPLE_(T) tuple<T##0, T##1, T##2, T##3, T##4, T##5, T##6, \
    T##7, T##8, T##9>

// GTEST_n_TYPENAMES_(T) declares a list of n typenames.
#define GTEST_0_TYPENAMES_(T)
#define GTEST_1_TYPENAMES_(T) typename T##0
#define GTEST_2_TYPENAMES_(T) typename T##0, typename T##1
#define GTEST_3_TYPENAMES_(T) typename T##0, typename T##1, typename T##2
#define GTEST_4_TYPENAMES_(T) typename T##0, typename T##1, typename T##2, \
    typename T##3
#define GTEST_5_TYPENAMES_(T) typename T##0, typename T##1, typename T##2, \
    typename T##3, typename T##4
#define GTEST_6_TYPENAMES_(T) typename T##0, typename T##1, typename T##2, \
    typename T##3, typename T##4, typename T##5
#define GTEST_7_TYPENAMES_(T) typename T##0, typename T##1, typename T##2, \
    typename T##3, typename T##4, typename T##5, typename T##6
#define GTEST_8_TYPENAMES_(T) typename T##0, typename T##1, typename T##2, \
    typename T##3, typename T##4, typename T##5, typename T##6, typename T##7
#define GTEST_9_TYPENAMES_(T) typename T##0, typename T##1, typename T##2, \
    typename T##3, typename T##4, typename T##5, typename T##6, \
    typename T##7, typename T##8
#define GTEST_10_TYPENAMES_(T) typename T##0, typename T##1, typename T##2, \
    typename T##3, typename T##4, typename T##5, typename T##6, \
    typename T##7, typename T##8, typename T##9

// In theory, defining stuff in the ::std namespace is undefined
// behavior.  We can do this as we are playing the role of a standard
// library vendor.
namespace std {
namespace tr1 {

template <typename T0 = void, typename T1 = void, typename T2 = void,
    typename T3 = void, typename T4 = void, typename T5 = void,
    typename T6 = void, typename T7 = void, typename T8 = void,
    typename T9 = void>
class tuple;

// Anything in namespace gtest_internal is Google Test's INTERNAL
// IMPLEMENTATION DETAIL and MUST NOT BE USED DIRECTLY in user code.
namespace gtest_internal {

// ByRef<T>::type is T if T is a reference; otherwise it's const T&.
template <typename T>
struct ByRef { typedef const T& type; };  // NOLINT
template <typename T>
struct ByRef<T&> { typedef T& type; };  // NOLINT

// A handy wrapper for ByRef.
#define GTEST_BY_REF_(T) typename ::std::tr1::gtest_internal::ByRef<T>::type

// AddRef<T>::type is T if T is a reference; otherwise it's T&.  This
// is the same as tr1::add_reference<T>::type.
template <typename T>
struct AddRef { typedef T& type; };  // NOLINT
template <typename T>
struct AddRef<T&> { typedef T& type; };  // NOLINT

// A handy wrapper for AddRef.
#define GTEST_ADD_REF_(T) typename ::std::tr1::gtest_internal::AddRef<T>::type

// A helper for implementing get<k>().
template <int k> class Get;

// A helper for implementing tuple_element<k, T>.  kIndexValid is true
// iff k < the number of fields in tuple type T.
template <bool kIndexValid, int kIndex, class Tuple>
struct TupleElement;

template <GTEST_10_TYPENAMES_(T)>
struct TupleElement<true, 0, GTEST_10_TUPLE_(T) > {
  typedef T0 type;
};

template <GTEST_10_TYPENAMES_(T)>
struct TupleElement<true, 1, GTEST_10_TUPLE_(T) > {
  typedef T1 type;
};

template <GTEST_10_TYPENAMES_(T)>
struct TupleElement<true, 2, GTEST_10_TUPLE_(T) > {
  typedef T2 type;
};

template <GTEST_10_TYPENAMES_(T)>
struct TupleElement<true, 3, GTEST_10_TUPLE_(T) > {
  typedef T3 type;
};

template <GTEST_10_TYPENAMES_(T)>
struct TupleElement<true, 4, GTEST_10_TUPLE_(T) > {
  typedef T4 type;
};

template <GTEST_10_TYPENAMES_(T)>
struct TupleElement<true, 5, GTEST_10_TUPLE_(T) > {
  typedef T5 type;
};

template <GTEST_10_TYPENAMES_(T)>
struct TupleElement<true, 6, GTEST_10_TUPLE_(T) > {
  typedef T6 type;
};

template <GTEST_10_TYPENAMES_(T)>
struct TupleElement<true, 7, GTEST_10_TUPLE_(T) > {
  typedef T7 type;
};

template <GTEST_10_TYPENAMES_(T)>
struct TupleElement<true, 8, GTEST_10_TUPLE_(T) > {
  typedef T8 type;
};

template <GTEST_10_TYPENAMES_(T)>
struct TupleElement<true, 9, GTEST_10_TUPLE_(T) > {
  typedef T9 type;
};

}  // namespace gtest_internal

template <>
class tuple<> {
 public:
  tuple() {}
  tuple(const tuple& /* t */)  {}
  tuple& operator=(const tuple& /* t */) { return *this; }
};

template <GTEST_1_TYPENAMES_(T)>
class GTEST_1_TUPLE_(T) {
 public:
  template <int k> friend class gtest_internal::Get;

  tuple() : f0_() {}

  explicit tuple(GTEST_BY_REF_(T0) f0) : f0_(f0) {}

  tuple(const tuple& t) : f0_(t.f0_) {}

  template <GTEST_1_TYPENAMES_(U)>
  tuple(const GTEST_1_TUPLE_(U)& t) : f0_(t.f0_) {}

  tuple& operator=(const tuple& t) { return CopyFrom(t); }

  template <GTEST_1_TYPENAMES_(U)>
  tuple& operator=(const GTEST_1_TUPLE_(U)& t) {
    return CopyFrom(t);
  }

  GTEST_DECLARE_TUPLE_AS_FRIEND_

  template <GTEST_1_TYPENAMES_(U)>
  tuple& CopyFrom(const GTEST_1_TUPLE_(U)& t) {
    f0_ = t.f0_;
    return *this;
  }

  T0 f0_;
};

template <GTEST_2_TYPENAMES_(T)>
class GTEST_2_TUPLE_(T) {
 public:
  template <int k> friend class gtest_internal::Get;

  tuple() : f0_(), f1_() {}

  explicit tuple(GTEST_BY_REF_(T0) f0, GTEST_BY_REF_(T1) f1) : f0_(f0),
      f1_(f1) {}

  tuple(const tuple& t) : f0_(t.f0_), f1_(t.f1_) {}

  template <GTEST_2_TYPENAMES_(U)>
  tuple(const GTEST_2_TUPLE_(U)& t) : f0_(t.f0_), f1_(t.f1_) {}
  template <typename U0, typename U1>
  tuple(const ::std::pair<U0, U1>& p) : f0_(p.first), f1_(p.second) {}

  tuple& operator=(const tuple& t) { return CopyFrom(t); }

  template <GTEST_2_TYPENAMES_(U)>
  tuple& operator=(const GTEST_2_TUPLE_(U)& t) {
    return CopyFrom(t);
  }
  template <typename U0, typename U1>
  tuple& operator=(const ::std::pair<U0, U1>& p) {
    f0_ = p.first;
    f1_ = p.second;
    return *this;
  }

  GTEST_DECLARE_TUPLE_AS_FRIEND_

  template <GTEST_2_TYPENAMES_(U)>
  tuple& CopyFrom(const GTEST_2_TUPLE_(U)& t) {
    f0_ = t.f0_;
    f1_ = t.f1_;
    return *this;
  }

  T0 f0_;
  T1 f1_;
};

template <GTEST_3_TYPENAMES_(T)>
class GTEST_3_TUPLE_(T) {
 public:
  template <int k> friend class gtest_internal::Get;

  tuple() : f0_(), f1_(), f2_() {}

  explicit tuple(GTEST_BY_REF_(T0) f0, GTEST_BY_REF_(T1) f1,
      GTEST_BY_REF_(T2) f2) : f0_(f0), f1_(f1), f2_(f2) {}

  tuple(const tuple& t) : f0_(t.f0_), f1_(t.f1_), f2_(t.f2_) {}

  template <GTEST_3_TYPENAMES_(U)>
  tuple(const GTEST_3_TUPLE_(U)& t) : f0_(t.f0_), f1_(t.f1_), f2_(t.f2_) {}

  tuple& operator=(const tuple& t) { return CopyFrom(t); }

  template <GTEST_3_TYPENAMES_(U)>
  tuple& operator=(const GTEST_3_TUPLE_(U)& t) {
    return CopyFrom(t);
  }

  GTEST_DECLARE_TUPLE_AS_FRIEND_

  template <GTEST_3_TYPENAMES_(U)>
  tuple& CopyFrom(const GTEST_3_TUPLE_(U)& t) {
    f0_ = t.f0_;
    f1_ = t.f1_;
    f2_ = t.f2_;
    return *this;
  }

  T0 f0_;
  T1 f1_;
  T2 f2_;
};

template <GTEST_4_TYPENAMES_(T)>
class GTEST_4_TUPLE_(T) {
 public:
  template <int k> friend class gtest_internal::Get;

  tuple() : f0_(), f1_(), f2_(), f3_() {}

  explicit tuple(GTEST_BY_REF_(T0) f0, GTEST_BY_REF_(T1) f1,
      GTEST_BY_REF_(T2) f2, GTEST_BY_REF_(T3) f3) : f0_(f0), f1_(f1), f2_(f2),
      f3_(f3) {}

  tuple(const tuple& t) : f0_(t.f0_), f1_(t.f1_), f2_(t.f2_), f3_(t.f3_) {}

  template <GTEST_4_TYPENAMES_(U)>
  tuple(const GTEST_4_TUPLE_(U)& t) : f0_(t.f0_), f1_(t.f1_), f2_(t.f2_),
      f3_(t.f3_) {}

  tuple& operator=(const tuple& t) { return CopyFrom(t); }

  template <GTEST_4_TYPENAMES_(U)>
  tuple& operator=(const GTEST_4_TUPLE_(U)& t) {
    return CopyFrom(t);
  }

  GTEST_DECLARE_TUPLE_AS_FRIEND_

  template <GTEST_4_TYPENAMES_(U)>
  tuple& CopyFrom(const GTEST_4_TUPLE_(U)& t) {
    f0_ = t.f0_;
    f1_ = t.f1_;
    f2_ = t.f2_;
    f3_ = t.f3_;
    return *this;
  }

  T0 f0_;
  T1 f1_;
  T2 f2_;
  T3 f3_;
};

template <GTEST_5_TYPENAMES_(T)>
class GTEST_5_TUPLE_(T) {
 public:
  template <int k> friend class gtest_internal::Get;

  tuple() : f0_(), f1_(), f2_(), f3_(), f4_() {}

  explicit tuple(GTEST_BY_REF_(T0) f0, GTEST_BY_REF_(T1) f1,
      GTEST_BY_REF_(T2) f2, GTEST_BY_REF_(T3) f3,
      GTEST_BY_REF_(T4) f4) : f0_(f0), f1_(f1), f2_(f2), f3_(f3), f4_(f4) {}

  tuple(const tuple& t) : f0_(t.f0_), f1_(t.f1_), f2_(t.f2_), f3_(t.f3_),
      f4_(t.f4_) {}

  template <GTEST_5_TYPENAMES_(U)>
  tuple(const GTEST_5_TUPLE_(U)& t) : f0_(t.f0_), f1_(t.f1_), f2_(t.f2_),
      f3_(t.f3_), f4_(t.f4_) {}

  tuple& operator=(const tuple& t) { return CopyFrom(t); }

  template <GTEST_5_TYPENAMES_(U)>
  tuple& operator=(const GTEST_5_TUPLE_(U)& t) {
    return CopyFrom(t);
  }

  GTEST_DECLARE_TUPLE_AS_FRIEND_

  template <GTEST_5_TYPENAMES_(U)>
  tuple& CopyFrom(const GTEST_5_TUPLE_(U)& t) {
    f0_ = t.f0_;
    f1_ = t.f1_;
    f2_ = t.f2_;
    f3_ = t.f3_;
    f4_ = t.f4_;
    return *this;
  }

  T0 f0_;
  T1 f1_;
  T2 f2_;
  T3 f3_;
  T4 f4_;
};

template <GTEST_6_TYPENAMES_(T)>
class GTEST_6_TUPLE_(T) {
 public:
  template <int k> friend class gtest_internal::Get;

  tuple() : f0_(), f1_(), f2_(), f3_(), f4_(), f5_() {}

  explicit tuple(GTEST_BY_REF_(T0) f0, GTEST_BY_REF_(T1) f1,
      GTEST_BY_REF_(T2) f2, GTEST_BY_REF_(T3) f3, GTEST_BY_REF_(T4) f4,
      GTEST_BY_REF_(T5) f5) : f0_(f0), f1_(f1), f2_(f2), f3_(f3), f4_(f4),
      f5_(f5) {}

  tuple(const tuple& t) : f0_(t.f0_), f1_(t.f1_), f2_(t.f2_), f3_(t.f3_),
      f4_(t.f4_), f5_(t.f5_) {}

  template <GTEST_6_TYPENAMES_(U)>
  tuple(const GTEST_6_TUPLE_(U)& t) : f0_(t.f0_), f1_(t.f1_), f2_(t.f2_),
      f3_(t.f3_), f4_(t.f4_), f5_(t.f5_) {}

  tuple& operator=(const tuple& t) { return CopyFrom(t); }

  template <GTEST_6_TYPENAMES_(U)>
  tuple& operator=(const GTEST_6_TUPLE_(U)& t) {
    return CopyFrom(t);
  }

  GTEST_DECLARE_TUPLE_AS_FRIEND_

  template <GTEST_6_TYPENAMES_(U)>
  tuple& CopyFrom(const GTEST_6_TUPLE_(U)& t) {
    f0_ = t.f0_;
    f1_ = t.f1_;
    f2_ = t.f2_;
    f3_ = t.f3_;
    f4_ = t.f4_;
    f5_ = t.f5_;
    return *this;
  }

  T0 f0_;
  T1 f1_;
  T2 f2_;
  T3 f3_;
  T4 f4_;
  T5 f5_;
};

template <GTEST_7_TYPENAMES_(T)>
class GTEST_7_TUPLE_(T) {
 public:
  template <int k> friend class gtest_internal::Get;

  tuple() : f0_(), f1_(), f2_(), f3_(), f4_(), f5_(), f6_() {}

  explicit tuple(GTEST_BY_REF_(T0) f0, GTEST_BY_REF_(T1) f1,
      GTEST_BY_REF_(T2) f2, GTEST_BY_REF_(T3) f3, GTEST_BY_REF_(T4) f4,
      GTEST_BY_REF_(T5) f5, GTEST_BY_REF_(T6) f6) : f0_(f0), f1_(f1), f2_(f2),
      f3_(f3), f4_(f4), f5_(f5), f6_(f6) {}

  tuple(const tuple& t) : f0_(t.f0_), f1_(t.f1_), f2_(t.f2_), f3_(t.f3_),
      f4_(t.f4_), f5_(t.f5_), f6_(t.f6_) {}

  template <GTEST_7_TYPENAMES_(U)>
  tuple(const GTEST_7_TUPLE_(U)& t) : f0_(t.f0_), f1_(t.f1_), f2_(t.f2_),
      f3_(t.f3_), f4_(t.f4_), f5_(t.f5_), f6_(t.f6_) {}

  tuple& operator=(const tuple& t) { return CopyFrom(t); }

  template <GTEST_7_TYPENAMES_(U)>
  tuple& operator=(const GTEST_7_TUPLE_(U)& t) {
    return CopyFrom(t);
  }

  GTEST_DECLARE_TUPLE_AS_FRIEND_

  template <GTEST_7_TYPENAMES_(U)>
  tuple& CopyFrom(const GTEST_7_TUPLE_(U)& t) {
    f0_ = t.f0_;
    f1_ = t.f1_;
    f2_ = t.f2_;
    f3_ = t.f3_;
    f4_ = t.f4_;
    f5_ = t.f5_;
    f6_ = t.f6_;
    return *this;
  }

  T0 f0_;
  T1 f1_;
  T2 f2_;
  T3 f3_;
  T4 f4_;
  T5 f5_;
  T6 f6_;
};

template <GTEST_8_TYPENAMES_(T)>
class GTEST_8_TUPLE_(T) {
 public:
  template <int k> friend class gtest_internal::Get;

  tuple() : f0_(), f1_(), f2_(), f3_(), f4_(), f5_(), f6_(), f7_() {}

  explicit tuple(GTEST_BY_REF_(T0) f0, GTEST_BY_REF_(T1) f1,
      GTEST_BY_REF_(T2) f2, GTEST_BY_REF_(T3) f3, GTEST_BY_REF_(T4) f4,
      GTEST_BY_REF_(T5) f5, GTEST_BY_REF_(T6) f6,
      GTEST_BY_REF_(T7) f7) : f0_(f0), f1_(f1), f2_(f2), f3_(f3), f4_(f4),
      f5_(f5), f6_(f6), f7_(f7) {}

  tuple(const tuple& t) : f0_(t.f0_), f1_(t.f1_), f2_(t.f2_), f3_(t.f3_),
      f4_(t.f4_), f5_(t.f5_), f6_(t.f6_), f7_(t.f7_) {}

  template <GTEST_8_TYPENAMES_(U)>
  tuple(const GTEST_8_TUPLE_(U)& t) : f0_(t.f0_), f1_(t.f1_), f2_(t.f2_),
      f3_(t.f3_), f4_(t.f4_), f5_(t.f5_), f6_(t.f6_), f7_(t.f7_) {}

  tuple& operator=(const tuple& t) { return CopyFrom(t); }

  template <GTEST_8_TYPENAMES_(U)>
  tuple& operator=(const GTEST_8_TUPLE_(U)& t) {
    return CopyFrom(t);
  }

  GTEST_DECLARE_TUPLE_AS_FRIEND_

  template <GTEST_8_TYPENAMES_(U)>
  tuple& CopyFrom(const GTEST_8_TUPLE_(U)& t) {
    f0_ = t.f0_;
    f1_ = t.f1_;
    f2_ = t.f2_;
    f3_ = t.f3_;
    f4_ = t.f4_;
    f5_ = t.f5_;
    f6_ = t.f6_;
    f7_ = t.f7_;
    return *this;
  }

  T0 f0_;
  T1 f1_;
  T2 f2_;
  T3 f3_;
  T4 f4_;
  T5 f5_;
  T6 f6_;
  T7 f7_;
};

template <GTEST_9_TYPENAMES_(T)>
class GTEST_9_TUPLE_(T) {
 public:
  template <int k> friend class gtest_internal::Get;

  tuple() : f0_(), f1_(), f2_(), f3_(), f4_(), f5_(), f6_(), f7_(), f8_() {}

  explicit tuple(GTEST_BY_REF_(T0) f0, GTEST_BY_REF_(T1) f1,
      GTEST_BY_REF_(T2) f2, GTEST_BY_REF_(T3) f3, GTEST_BY_REF_(T4) f4,
      GTEST_BY_REF_(T5) f5, GTEST_BY_REF_(T6) f6, GTEST_BY_REF_(T7) f7,
      GTEST_BY_REF_(T8) f8) : f0_(f0), f1_(f1), f2_(f2), f3_(f3), f4_(f4),
      f5_(f5), f6_(f6), f7_(f7), f8_(f8) {}

  tuple(const tuple& t) : f0_(t.f0_), f1_(t.f1_), f2_(t.f2_), f3_(t.f3_),
      f4_(t.f4_), f5_(t.f5_), f6_(t.f6_), f7_(t.f7_), f8_(t.f8_) {}

  template <GTEST_9_TYPENAMES_(U)>
  tuple(const GTEST_9_TUPLE_(U)& t) : f0_(t.f0_), f1_(t.f1_), f2_(t.f2_),
      f3_(t.f3_), f4_(t.f4_), f5_(t.f5_), f6_(t.f6_), f7_(t.f7_), f8_(t.f8_) {}

  tuple& operator=(const tuple& t) { return CopyFrom(t); }

  template <GTEST_9_TYPENAMES_(U)>
  tuple& operator=(const GTEST_9_TUPLE_(U)& t) {
    return CopyFrom(t);
  }

  GTEST_DECLARE_TUPLE_AS_FRIEND_

  template <GTEST_9_TYPENAMES_(U)>
  tuple& CopyFrom(const GTEST_9_TUPLE_(U)& t) {
    f0_ = t.f0_;
    f1_ = t.f1_;
    f2_ = t.f2_;
    f3_ = t.f3_;
    f4_ = t.f4_;
    f5_ = t.f5_;
    f6_ = t.f6_;
    f7_ = t.f7_;
    f8_ = t.f8_;
    return *this;
  }

  T0 f0_;
  T1 f1_;
  T2 f2_;
  T3 f3_;
  T4 f4_;
  T5 f5_;
  T6 f6_;
  T7 f7_;
  T8 f8_;
};

template <GTEST_10_TYPENAMES_(T)>
class tuple {
 public:
  template <int k> friend class gtest_internal::Get;

  tuple() : f0_(), f1_(), f2_(), f3_(), f4_(), f5_(), f6_(), f7_(), f8_(),
      f9_() {}

  explicit tuple(GTEST_BY_REF_(T0) f0, GTEST_BY_REF_(T1) f1,
      GTEST_BY_REF_(T2) f2, GTEST_BY_REF_(T3) f3, GTEST_BY_REF_(T4) f4,
      GTEST_BY_REF_(T5) f5, GTEST_BY_REF_(T6) f6, GTEST_BY_REF_(T7) f7,
      GTEST_BY_REF_(T8) f8, GTEST_BY_REF_(T9) f9) : f0_(f0), f1_(f1), f2_(f2),
      f3_(f3), f4_(f4), f5_(f5), f6_(f6), f7_(f7), f8_(f8), f9_(f9) {}

  tuple(const tuple& t) : f0_(t.f0_), f1_(t.f1_), f2_(t.f2_), f3_(t.f3_),
      f4_(t.f4_), f5_(t.f5_), f6_(t.f6_), f7_(t.f7_), f8_(t.f8_), f9_(t.f9_) {}

  template <GTEST_10_TYPENAMES_(U)>
  tuple(const GTEST_10_TUPLE_(U)& t) : f0_(t.f0_), f1_(t.f1_), f2_(t.f2_),
      f3_(t.f3_), f4_(t.f4_), f5_(t.f5_), f6_(t.f6_), f7_(t.f7_), f8_(t.f8_),
      f9_(t.f9_) {}

  tuple& operator=(const tuple& t) { return CopyFrom(t); }

  template <GTEST_10_TYPENAMES_(U)>
  tuple& operator=(const GTEST_10_TUPLE_(U)& t) {
    return CopyFrom(t);
  }

  GTEST_DECLARE_TUPLE_AS_FRIEND_

  template <GTEST_10_TYPENAMES_(U)>
  tuple& CopyFrom(const GTEST_10_TUPLE_(U)& t) {
    f0_ = t.f0_;
    f1_ = t.f1_;
    f2_ = t.f2_;
    f3_ = t.f3_;
    f4_ = t.f4_;
    f5_ = t.f5_;
    f6_ = t.f6_;
    f7_ = t.f7_;
    f8_ = t.f8_;
    f9_ = t.f9_;
    return *this;
  }

  T0 f0_;
  T1 f1_;
  T2 f2_;
  T3 f3_;
  T4 f4_;
  T5 f5_;
  T6 f6_;
  T7 f7_;
  T8 f8_;
  T9 f9_;
};

// 6.1.3.2 Tuple creation functions.

// Known limitations: we don't support passing an
// std::tr1::reference_wrapper<T> to make_tuple().  And we don't
// implement tie().

inline tuple<> make_tuple() { return tuple<>(); }

template <GTEST_1_TYPENAMES_(T)>
inline GTEST_1_TUPLE_(T) make_tuple(const T0& f0) {
  return GTEST_1_TUPLE_(T)(f0);
}

template <GTEST_2_TYPENAMES_(T)>
inline GTEST_2_TUPLE_(T) make_tuple(const T0& f0, const T1& f1) {
  return GTEST_2_TUPLE_(T)(f0, f1);
}

template <GTEST_3_TYPENAMES_(T)>
inline GTEST_3_TUPLE_(T) make_tuple(const T0& f0, const T1& f1, const T2& f2) {
  return GTEST_3_TUPLE_(T)(f0, f1, f2);
}

template <GTEST_4_TYPENAMES_(T)>
inline GTEST_4_TUPLE_(T) make_tuple(const T0& f0, const T1& f1, const T2& f2,
    const T3& f3) {
  return GTEST_4_TUPLE_(T)(f0, f1, f2, f3);
}

template <GTEST_5_TYPENAMES_(T)>
inline GTEST_5_TUPLE_(T) make_tuple(const T0& f0, const T1& f1, const T2& f2,
    const T3& f3, const T4& f4) {
  return GTEST_5_TUPLE_(T)(f0, f1, f2, f3, f4);
}

template <GTEST_6_TYPENAMES_(T)>
inline GTEST_6_TUPLE_(T) make_tuple(const T0& f0, const T1& f1, const T2& f2,
    const T3& f3, const T4& f4, const T5& f5) {
  return GTEST_6_TUPLE_(T)(f0, f1, f2, f3, f4, f5);
}

template <GTEST_7_TYPENAMES_(T)>
inline GTEST_7_TUPLE_(T) make_tuple(const T0& f0, const T1& f1, const T2& f2,
    const T3& f3, const T4& f4, const T5& f5, const T6& f6) {
  return GTEST_7_TUPLE_(T)(f0, f1, f2, f3, f4, f5, f6);
}

template <GTEST_8_TYPENAMES_(T)>
inline GTEST_8_TUPLE_(T) make_tuple(const T0& f0, const T1& f1, const T2& f2,
    const T3& f3, const T4& f4, const T5& f5, const T6& f6, const T7& f7) {
  return GTEST_8_TUPLE_(T)(f0, f1, f2, f3, f4, f5, f6, f7);
}

template <GTEST_9_TYPENAMES_(T)>
inline GTEST_9_TUPLE_(T) make_tuple(const T0& f0, const T1& f1, const T2& f2,
    const T3& f3, const T4& f4, const T5& f5, const T6& f6, const T7& f7,
    const T8& f8) {
  return GTEST_9_TUPLE_(T)(f0, f1, f2, f3, f4, f5, f6, f7, f8);
}

template <GTEST_10_TYPENAMES_(T)>
inline GTEST_10_TUPLE_(T) make_tuple(const T0& f0, const T1& f1, const T2& f2,
    const T3& f3, const T4& f4, const T5& f5, const T6& f6, const T7& f7,
    const T8& f8, const T9& f9) {
  return GTEST_10_TUPLE_(T)(f0, f1, f2, f3, f4, f5, f6, f7, f8, f9);
}

// 6.1.3.3 Tuple helper classes.

template <typename Tuple> struct tuple_size;

template <GTEST_0_TYPENAMES_(T)>
struct tuple_size<GTEST_0_TUPLE_(T) > {
  static const int value = 0;
};

template <GTEST_1_TYPENAMES_(T)>
struct tuple_size<GTEST_1_TUPLE_(T) > {
  static const int value = 1;
};

template <GTEST_2_TYPENAMES_(T)>
struct tuple_size<GTEST_2_TUPLE_(T) > {
  static const int value = 2;
};

template <GTEST_3_TYPENAMES_(T)>
struct tuple_size<GTEST_3_TUPLE_(T) > {
  static const int value = 3;
};

template <GTEST_4_TYPENAMES_(T)>
struct tuple_size<GTEST_4_TUPLE_(T) > {
  static const int value = 4;
};

template <GTEST_5_TYPENAMES_(T)>
struct tuple_size<GTEST_5_TUPLE_(T) > {
  static const int value = 5;
};

template <GTEST_6_TYPENAMES_(T)>
struct tuple_size<GTEST_6_TUPLE_(T) > {
  static const int value = 6;
};

template <GTEST_7_TYPENAMES_(T)>
struct tuple_size<GTEST_7_TUPLE_(T) > {
  static const int value = 7;
};

template <GTEST_8_TYPENAMES_(T)>
struct tuple_size<GTEST_8_TUPLE_(T) > {
  static const int value = 8;
};

template <GTEST_9_TYPENAMES_(T)>
struct tuple_size<GTEST_9_TUPLE_(T) > {
  static const int value = 9;
};

template <GTEST_10_TYPENAMES_(T)>
struct tuple_size<GTEST_10_TUPLE_(T) > {
  static const int value = 10;
};

template <int k, class Tuple>
struct tuple_element {
  typedef typename gtest_internal::TupleElement<
      k < (tuple_size<Tuple>::value), k, Tuple>::type type;
};

#define GTEST_TUPLE_ELEMENT_(k, Tuple) typename tuple_element<k, Tuple >::type

// 6.1.3.4 Element access.

namespace gtest_internal {

template <>
class Get<0> {
 public:
  template <class Tuple>
  static GTEST_ADD_REF_(GTEST_TUPLE_ELEMENT_(0, Tuple))
  Field(Tuple& t) { return t.f0_; }  // NOLINT

  template <class Tuple>
  static GTEST_BY_REF_(GTEST_TUPLE_ELEMENT_(0, Tuple))
  ConstField(const Tuple& t) { return t.f0_; }
};

template <>
class Get<1> {
 public:
  template <class Tuple>
  static GTEST_ADD_REF_(GTEST_TUPLE_ELEMENT_(1, Tuple))
  Field(Tuple& t) { return t.f1_; }  // NOLINT

  template <class Tuple>
  static GTEST_BY_REF_(GTEST_TUPLE_ELEMENT_(1, Tuple))
  ConstField(const Tuple& t) { return t.f1_; }
};

template <>
class Get<2> {
 public:
  template <class Tuple>
  static GTEST_ADD_REF_(GTEST_TUPLE_ELEMENT_(2, Tuple))
  Field(Tuple& t) { return t.f2_; }  // NOLINT

  template <class Tuple>
  static GTEST_BY_REF_(GTEST_TUPLE_ELEMENT_(2, Tuple))
  ConstField(const Tuple& t) { return t.f2_; }
};

template <>
class Get<3> {
 public:
  template <class Tuple>
  static GTEST_ADD_REF_(GTEST_TUPLE_ELEMENT_(3, Tuple))
  Field(Tuple& t) { return t.f3_; }  // NOLINT

  template <class Tuple>
  static GTEST_BY_REF_(GTEST_TUPLE_ELEMENT_(3, Tuple))
  ConstField(const Tuple& t) { return t.f3_; }
};

template <>
class Get<4> {
 public:
  template <class Tuple>
  static GTEST_ADD_REF_(GTEST_TUPLE_ELEMENT_(4, Tuple))
  Field(Tuple& t) { return t.f4_; }  // NOLINT

  template <class Tuple>
  static GTEST_BY_REF_(GTEST_TUPLE_ELEMENT_(4, Tuple))
  ConstField(const Tuple& t) { return t.f4_; }
};

template <>
class Get<5> {
 public:
  template <class Tuple>
  static GTEST_ADD_REF_(GTEST_TUPLE_ELEMENT_(5, Tuple))
  Field(Tuple& t) { return t.f5_; }  // NOLINT

  template <class Tuple>
  static GTEST_BY_REF_(GTEST_TUPLE_ELEMENT_(5, Tuple))
  ConstField(const Tuple& t) { return t.f5_; }
};

template <>
class Get<6> {
 public:
  template <class Tuple>
  static GTEST_ADD_REF_(GTEST_TUPLE_ELEMENT_(6, Tuple))
  Field(Tuple& t) { return t.f6_; }  // NOLINT

  template <class Tuple>
  static GTEST_BY_REF_(GTEST_TUPLE_ELEMENT_(6, Tuple))
  ConstField(const Tuple& t) { return t.f6_; }
};

template <>
class Get<7> {
 public:
  template <class Tuple>
  static GTEST_ADD_REF_(GTEST_TUPLE_ELEMENT_(7, Tuple))
  Field(Tuple& t) { return t.f7_; }  // NOLINT

  template <class Tuple>
  static GTEST_BY_REF_(GTEST_TUPLE_ELEMENT_(7, Tuple))
  ConstField(const Tuple& t) { return t.f7_; }
};

template <>
class Get<8> {
 public:
  template <class Tuple>
  static GTEST_ADD_REF_(GTEST_TUPLE_ELEMENT_(8, Tuple))
  Field(Tuple& t) { return t.f8_; }  // NOLINT

  template <class Tuple>
  static GTEST_BY_REF_(GTEST_TUPLE_ELEMENT_(8, Tuple))
  ConstField(const Tuple& t) { return t.f8_; }
};

template <>
class Get<9> {
 public:
  template <class Tuple>
  static GTEST_ADD_REF_(GTEST_TUPLE_ELEMENT_(9, Tuple))
  Field(Tuple& t) { return t.f9_; }  // NOLINT

  template <class Tuple>
  static GTEST_BY_REF_(GTEST_TUPLE_ELEMENT_(9, Tuple))
  ConstField(const Tuple& t) { return t.f9_; }
};

}  // namespace gtest_internal

template <int k, GTEST_10_TYPENAMES_(T)>
GTEST_ADD_REF_(GTEST_TUPLE_ELEMENT_(k, GTEST_10_TUPLE_(T)))
get(GTEST_10_TUPLE_(T)& t) {
  return gtest_internal::Get<k>::Field(t);
}

template <int k, GTEST_10_TYPENAMES_(T)>
GTEST_BY_REF_(GTEST_TUPLE_ELEMENT_(k,  GTEST_10_TUPLE_(T)))
get(const GTEST_10_TUPLE_(T)& t) {
  return gtest_internal::Get<k>::ConstField(t);
}

// 6.1.3.5 Relational operators

// We only implement == and !=, as we don't have a need for the rest yet.

namespace gtest_internal {

// SameSizeTuplePrefixComparator<k, k>::Eq(t1, t2) returns true if the
// first k fields of t1 equals the first k fields of t2.
// SameSizeTuplePrefixComparator(k1, k2) would be a compiler error if
// k1 != k2.
template <int kSize1, int kSize2>
struct SameSizeTuplePrefixComparator;

template <>
struct SameSizeTuplePrefixComparator<0, 0> {
  template <class Tuple1, class Tuple2>
  static bool Eq(const Tuple1& /* t1 */, const Tuple2& /* t2 */) {
    return true;
  }
};

template <int k>
struct SameSizeTuplePrefixComparator<k, k> {
  template <class Tuple1, class Tuple2>
  static bool Eq(const Tuple1& t1, const Tuple2& t2) {
    return SameSizeTuplePrefixComparator<k - 1, k - 1>::Eq(t1, t2) &&
        ::std::tr1::get<k - 1>(t1) == ::std::tr1::get<k - 1>(t2);
  }
};

}  // namespace gtest_internal

template <GTEST_10_TYPENAMES_(T), GTEST_10_TYPENAMES_(U)>
inline bool operator==(const GTEST_10_TUPLE_(T)& t,
                       const GTEST_10_TUPLE_(U)& u) {
  return gtest_internal::SameSizeTuplePrefixComparator<
      tuple_size<GTEST_10_TUPLE_(T) >::value,
      tuple_size<GTEST_10_TUPLE_(U) >::value>::Eq(t, u);
}

template <GTEST_10_TYPENAMES_(T), GTEST_10_TYPENAMES_(U)>
inline bool operator!=(const GTEST_10_TUPLE_(T)& t,
                       const GTEST_10_TUPLE_(U)& u) { return !(t == u); }

// 6.1.4 Pairs.
// Unimplemented.

}  // namespace tr1
}  // namespace std

#undef GTEST_0_TUPLE_
#undef GTEST_1_TUPLE_
#undef GTEST_2_TUPLE_
#undef GTEST_3_TUPLE_
#undef GTEST_4_TUPLE_
#undef GTEST_5_TUPLE_
#undef GTEST_6_TUPLE_
#undef GTEST_7_TUPLE_
#undef GTEST_8_TUPLE_
#undef GTEST_9_TUPLE_
#undef GTEST_10_TUPLE_

#undef GTEST_0_TYPENAMES_
#undef GTEST_1_TYPENAMES_
#undef GTEST_2_TYPENAMES_
#undef GTEST_3_TYPENAMES_
#undef GTEST_4_TYPENAMES_
#undef GTEST_5_TYPENAMES_
#undef GTEST_6_TYPENAMES_
#undef GTEST_7_TYPENAMES_
#undef GTEST_8_TYPENAMES_
#undef GTEST_9_TYPENAMES_
#undef GTEST_10_TYPENAMES_

#undef GTEST_DECLARE_TUPLE_AS_FRIEND_
#undef GTEST_BY_REF_
#undef GTEST_ADD_REF_
#undef GTEST_TUPLE_ELEMENT_

#endif  // GTEST_INCLUDE_GTEST_INTERNAL_GTEST_TUPLE_H_
# elif GTEST_ENV_HAS_STD_TUPLE_
#  include <tuple>
// C++11 puts its tuple into the ::std namespace rather than
// ::std::tr1.  gtest expects tuple to live in ::std::tr1, so put it there.
// This causes undefined behavior, but supported compilers react in
// the way we intend.
namespace std {
namespace tr1 {
using ::std::get;
using ::std::make_tuple;
using ::std::tuple;
using ::std::tuple_element;
using ::std::tuple_size;
}
}

# elif GTEST_OS_SYMBIAN

// On Symbian, BOOST_HAS_TR1_TUPLE causes Boost's TR1 tuple library to
// use STLport's tuple implementation, which unfortunately doesn't
// work as the copy of STLport distributed with Symbian is incomplete.
// By making sure BOOST_HAS_TR1_TUPLE is undefined, we force Boost to
// use its own tuple implementation.
#  ifdef BOOST_HAS_TR1_TUPLE
#   undef BOOST_HAS_TR1_TUPLE
#  endif  // BOOST_HAS_TR1_TUPLE

// This prevents <boost/tr1/detail/config.hpp>, which defines
// BOOST_HAS_TR1_TUPLE, from being #included by Boost's <tuple>.
#  define BOOST_TR1_DETAIL_CONFIG_HPP_INCLUDED
#  include <tuple>  // IWYU pragma: export  // NOLINT

# elif defined(__GNUC__) && (GTEST_GCC_VER_ >= 40000)
// GCC 4.0+ implements tr1/tuple in the <tr1/tuple> header.  This does
// not conform to the TR1 spec, which requires the header to be <tuple>.

#  if !GTEST_HAS_RTTI && GTEST_GCC_VER_ < 40302
// Until version 4.3.2, gcc has a bug that causes <tr1/functional>,
// which is #included by <tr1/tuple>, to not compile when RTTI is
// disabled.  _TR1_FUNCTIONAL is the header guard for
// <tr1/functional>.  Hence the following #define is a hack to prevent
// <tr1/functional> from being included.
#   define _TR1_FUNCTIONAL 1
#   include <tr1/tuple>
#   undef _TR1_FUNCTIONAL  // Allows the user to #include
                        // <tr1/functional> if he chooses to.
#  else
#   include <tr1/tuple>  // NOLINT
#  endif  // !GTEST_HAS_RTTI && GTEST_GCC_VER_ < 40302

# else
// If the compiler is not GCC 4.0+, we assume the user is using a
// spec-conforming TR1 implementation.
#  include <tuple>  // IWYU pragma: export  // NOLINT
# endif  // GTEST_USE_OWN_TR1_TUPLE

#endif  // GTEST_HAS_TR1_TUPLE

// Determines whether clone(2) is supported.
// Usually it will only be available on Linux, excluding
// Linux on the Itanium architecture.
// Also see http://linux.die.net/man/2/clone.
#ifndef GTEST_HAS_CLONE
// The user didn't tell us, so we need to figure it out.

# if GTEST_OS_LINUX && !defined(__ia64__)
#  if GTEST_OS_LINUX_ANDROID
// On Android, clone() is only available on ARM starting with Gingerbread.
#    if defined(__arm__) && __ANDROID_API__ >= 9
#     define GTEST_HAS_CLONE 1
#    else
#     define GTEST_HAS_CLONE 0
#    endif
#  else
#   define GTEST_HAS_CLONE 1
#  endif
# else
#  define GTEST_HAS_CLONE 0
# endif  // GTEST_OS_LINUX && !defined(__ia64__)

#endif  // GTEST_HAS_CLONE

// Determines whether to support stream redirection. This is used to test
// output correctness and to implement death tests.
#ifndef GTEST_HAS_STREAM_REDIRECTION
// By default, we assume that stream redirection is supported on all
// platforms except known mobile ones.
# if GTEST_OS_WINDOWS_MOBILE || GTEST_OS_SYMBIAN || \
    GTEST_OS_WINDOWS_PHONE || GTEST_OS_WINDOWS_RT
#  define GTEST_HAS_STREAM_REDIRECTION 0
# else
#  define GTEST_HAS_STREAM_REDIRECTION 1
# endif  // !GTEST_OS_WINDOWS_MOBILE && !GTEST_OS_SYMBIAN
#endif  // GTEST_HAS_STREAM_REDIRECTION

// Determines whether to support death tests.
// Google Test does not support death tests for VC 7.1 and earlier as
// abort() in a VC 7.1 application compiled as GUI in debug config
// pops up a dialog window that cannot be suppressed programmatically.
#if (GTEST_OS_LINUX || GTEST_OS_CYGWIN || GTEST_OS_SOLARIS || \
     (GTEST_OS_MAC && !GTEST_OS_IOS) || \
     (GTEST_OS_WINDOWS_DESKTOP && _MSC_VER >= 1400) || \
     GTEST_OS_WINDOWS_MINGW || GTEST_OS_AIX || GTEST_OS_HPUX || \
     GTEST_OS_OPENBSD || GTEST_OS_QNX || GTEST_OS_FREEBSD)
# define GTEST_HAS_DEATH_TEST 1
#endif

// We don't support MSVC 7.1 with exceptions disabled now.  Therefore
// all the compilers we care about are adequate for supporting
// value-parameterized tests.
#define GTEST_HAS_PARAM_TEST 1

// Determines whether to support type-driven tests.

// Typed tests need <typeinfo> and variadic macros, which GCC, VC++ 8.0,
// Sun Pro CC, IBM Visual Age, and HP aCC support.
#if defined(__GNUC__) || (_MSC_VER >= 1400) || defined(__SUNPRO_CC) || \
    defined(__IBMCPP__) || defined(__HP_aCC)
# define GTEST_HAS_TYPED_TEST 1
# define GTEST_HAS_TYPED_TEST_P 1
#endif

// Determines whether to support Combine(). This only makes sense when
// value-parameterized tests are enabled.  The implementation doesn't
// work on Sun Studio since it doesn't understand templated conversion
// operators.
#if GTEST_HAS_PARAM_TEST && GTEST_HAS_TR1_TUPLE && !defined(__SUNPRO_CC)
# define GTEST_HAS_COMBINE 1
#endif

// Determines whether the system compiler uses UTF-16 for encoding wide strings.
#define GTEST_WIDE_STRING_USES_UTF16_ \
    (GTEST_OS_WINDOWS || GTEST_OS_CYGWIN || GTEST_OS_SYMBIAN || GTEST_OS_AIX)

// Determines whether test results can be streamed to a socket.
#if GTEST_OS_LINUX
# define GTEST_CAN_STREAM_RESULTS_ 1
#endif

// Defines some utility macros.

// The GNU compiler emits a warning if nested "if" statements are followed by
// an "else" statement and braces are not used to explicitly disambiguate the
// "else" binding.  This leads to problems with code like:
//
//   if (gate)
//     ASSERT_*(condition) << "Some message";
//
// The "switch (0) case 0:" idiom is used to suppress this.
#ifdef __INTEL_COMPILER
# define GTEST_AMBIGUOUS_ELSE_BLOCKER_
#else
# define GTEST_AMBIGUOUS_ELSE_BLOCKER_ switch (0) case 0: default:  // NOLINT
#endif

// Use this annotation at the end of a struct/class definition to
// prevent the compiler from optimizing away instances that are never
// used.  This is useful when all interesting logic happens inside the
// c'tor and / or d'tor.  Example:
//
//   struct Foo {
//     Foo() { ... }
//   } GTEST_ATTRIBUTE_UNUSED_;
//
// Also use it after a variable or parameter declaration to tell the
// compiler the variable/parameter does not have to be used.
#if defined(__GNUC__) && !defined(COMPILER_ICC)
# define GTEST_ATTRIBUTE_UNUSED_ __attribute__ ((unused))
#elif defined(__clang__)
# if __has_attribute(unused)
#  define GTEST_ATTRIBUTE_UNUSED_ __attribute__ ((unused))
# endif
#endif
#ifndef GTEST_ATTRIBUTE_UNUSED_
# define GTEST_ATTRIBUTE_UNUSED_
#endif

// A macro to disallow operator=
// This should be used in the private: declarations for a class.
#define GTEST_DISALLOW_ASSIGN_(type)\
  void operator=(type const &)

// A macro to disallow copy constructor and operator=
// This should be used in the private: declarations for a class.
#define GTEST_DISALLOW_COPY_AND_ASSIGN_(type)\
  type(type const &);\
  GTEST_DISALLOW_ASSIGN_(type)

// Tell the compiler to warn about unused return values for functions declared
// with this macro.  The macro should be used on function declarations
// following the argument list:
//
//   Sprocket* AllocateSprocket() GTEST_MUST_USE_RESULT_;
#if defined(__GNUC__) && (GTEST_GCC_VER_ >= 30400) && !defined(COMPILER_ICC)
# define GTEST_MUST_USE_RESULT_ __attribute__ ((warn_unused_result))
#else
# define GTEST_MUST_USE_RESULT_
#endif  // __GNUC__ && (GTEST_GCC_VER_ >= 30400) && !COMPILER_ICC

// MS C++ compiler emits warning when a conditional expression is compile time
// constant. In some contexts this warning is false positive and needs to be
// suppressed. Use the following two macros in such cases:
//
// GTEST_INTENTIONAL_CONST_COND_PUSH_()
// while (true) {
// GTEST_INTENTIONAL_CONST_COND_POP_()
// }
# define GTEST_INTENTIONAL_CONST_COND_PUSH_() \
    GTEST_DISABLE_MSC_WARNINGS_PUSH_(4127)
# define GTEST_INTENTIONAL_CONST_COND_POP_() \
    GTEST_DISABLE_MSC_WARNINGS_POP_()

// Determine whether the compiler supports Microsoft's Structured Exception
// Handling.  This is supported by several Windows compilers but generally
// does not exist on any other system.
#ifndef GTEST_HAS_SEH
// The user didn't tell us, so we need to figure it out.

# if defined(_MSC_VER) || defined(__BORLANDC__)
// These two compilers are known to support SEH.
#  define GTEST_HAS_SEH 1
# else
// Assume no SEH.
#  define GTEST_HAS_SEH 0
# endif

#define GTEST_IS_THREADSAFE \
    (GTEST_HAS_MUTEX_AND_THREAD_LOCAL_ \
     || (GTEST_OS_WINDOWS && !GTEST_OS_WINDOWS_PHONE && !GTEST_OS_WINDOWS_RT) \
     || GTEST_HAS_PTHREAD)

#endif  // GTEST_HAS_SEH

#ifdef _MSC_VER
# if GTEST_LINKED_AS_SHARED_LIBRARY
#  define GTEST_API_ __declspec(dllimport)
# elif GTEST_CREATE_SHARED_LIBRARY
#  define GTEST_API_ __declspec(dllexport)
# endif
#elif __GNUC__ >= 4 || defined(__clang__)
# define GTEST_API_ __attribute__((visibility ("default")))
#endif // _MSC_VER

#ifndef GTEST_API_
# define GTEST_API_
#endif

#ifdef __GNUC__
// Ask the compiler to never inline a given function.
# define GTEST_NO_INLINE_ __attribute__((noinline))
#else
# define GTEST_NO_INLINE_
#endif

// _LIBCPP_VERSION is defined by the libc++ library from the LLVM project.
#if defined(__GLIBCXX__) || defined(_LIBCPP_VERSION)
# define GTEST_HAS_CXXABI_H_ 1
#else
# define GTEST_HAS_CXXABI_H_ 0
#endif

// A function level attribute to disable checking for use of uninitialized
// memory when built with MemorySanitizer.
#if defined(__clang__)
# if __has_feature(memory_sanitizer)
#  define GTEST_ATTRIBUTE_NO_SANITIZE_MEMORY_ \
       __attribute__((no_sanitize_memory))
# else
#  define GTEST_ATTRIBUTE_NO_SANITIZE_MEMORY_
# endif  // __has_feature(memory_sanitizer)
#else
# define GTEST_ATTRIBUTE_NO_SANITIZE_MEMORY_
#endif  // __clang__

// A function level attribute to disable AddressSanitizer instrumentation.
#if defined(__clang__)
# if __has_feature(address_sanitizer)
#  define GTEST_ATTRIBUTE_NO_SANITIZE_ADDRESS_ \
       __attribute__((no_sanitize_address))
# else
#  define GTEST_ATTRIBUTE_NO_SANITIZE_ADDRESS_
# endif  // __has_feature(address_sanitizer)
#else
# define GTEST_ATTRIBUTE_NO_SANITIZE_ADDRESS_
#endif  // __clang__

// A function level attribute to disable ThreadSanitizer instrumentation.
#if defined(__clang__)
# if __has_feature(thread_sanitizer)
#  define GTEST_ATTRIBUTE_NO_SANITIZE_THREAD_ \
       __attribute__((no_sanitize_thread))
# else
#  define GTEST_ATTRIBUTE_NO_SANITIZE_THREAD_
# endif  // __has_feature(thread_sanitizer)
#else
# define GTEST_ATTRIBUTE_NO_SANITIZE_THREAD_
#endif  // __clang__

namespace testing {

class Message;

#if defined(GTEST_TUPLE_NAMESPACE_)
// Import tuple and friends into the ::testing namespace.
// It is part of our interface, having them in ::testing allows us to change
// their types as needed.
using GTEST_TUPLE_NAMESPACE_::get;
using GTEST_TUPLE_NAMESPACE_::make_tuple;
using GTEST_TUPLE_NAMESPACE_::tuple;
using GTEST_TUPLE_NAMESPACE_::tuple_size;
using GTEST_TUPLE_NAMESPACE_::tuple_element;
#endif  // defined(GTEST_TUPLE_NAMESPACE_)

namespace internal {

// A secret type that Google Test users don't know about.  It has no
// definition on purpose.  Therefore it's impossible to create a
// Secret object, which is what we want.
class Secret;

// The GTEST_COMPILE_ASSERT_ macro can be used to verify that a compile time
// expression is true. For example, you could use it to verify the
// size of a static array:
//
//   GTEST_COMPILE_ASSERT_(GTEST_ARRAY_SIZE_(names) == NUM_NAMES,
//                         names_incorrect_size);
//
// or to make sure a struct is smaller than a certain size:
//
//   GTEST_COMPILE_ASSERT_(sizeof(foo) < 128, foo_too_large);
//
// The second argument to the macro is the name of the variable. If
// the expression is false, most compilers will issue a warning/error
// containing the name of the variable.

#if GTEST_LANG_CXX11
# define GTEST_COMPILE_ASSERT_(expr, msg) static_assert(expr, #msg)
#else  // !GTEST_LANG_CXX11
template <bool>
  struct CompileAssert {
};

# define GTEST_COMPILE_ASSERT_(expr, msg) \
  typedef ::testing::internal::CompileAssert<(static_cast<bool>(expr))> \
      msg[static_cast<bool>(expr) ? 1 : -1] GTEST_ATTRIBUTE_UNUSED_
#endif  // !GTEST_LANG_CXX11

// Implementation details of GTEST_COMPILE_ASSERT_:
//
// (In C++11, we simply use static_assert instead of the following)
//
// - GTEST_COMPILE_ASSERT_ works by defining an array type that has -1
//   elements (and thus is invalid) when the expression is false.
//
// - The simpler definition
//
//    #define GTEST_COMPILE_ASSERT_(expr, msg) typedef char msg[(expr) ? 1 : -1]
//
//   does not work, as gcc supports variable-length arrays whose sizes
//   are determined at run-time (this is gcc's extension and not part
//   of the C++ standard).  As a result, gcc fails to reject the
//   following code with the simple definition:
//
//     int foo;
//     GTEST_COMPILE_ASSERT_(foo, msg); // not supposed to compile as foo is
//                                      // not a compile-time constant.
//
// - By using the type CompileAssert<(bool(expr))>, we ensures that
//   expr is a compile-time constant.  (Template arguments must be
//   determined at compile-time.)
//
// - The outter parentheses in CompileAssert<(bool(expr))> are necessary
//   to work around a bug in gcc 3.4.4 and 4.0.1.  If we had written
//
//     CompileAssert<bool(expr)>
//
//   instead, these compilers will refuse to compile
//
//     GTEST_COMPILE_ASSERT_(5 > 0, some_message);
//
//   (They seem to think the ">" in "5 > 0" marks the end of the
//   template argument list.)
//
// - The array size is (bool(expr) ? 1 : -1), instead of simply
//
//     ((expr) ? 1 : -1).
//
//   This is to avoid running into a bug in MS VC 7.1, which
//   causes ((0.0) ? 1 : -1) to incorrectly evaluate to 1.

// StaticAssertTypeEqHelper is used by StaticAssertTypeEq defined in gtest.h.
//
// This template is declared, but intentionally undefined.
template <typename T1, typename T2>
struct StaticAssertTypeEqHelper;

template <typename T>
struct StaticAssertTypeEqHelper<T, T> {
  enum { value = true };
};

// Evaluates to the number of elements in 'array'.
#define GTEST_ARRAY_SIZE_(array) (sizeof(array) / sizeof(array[0]))

#if GTEST_HAS_GLOBAL_STRING
typedef ::string string;
#else
typedef ::std::string string;
#endif  // GTEST_HAS_GLOBAL_STRING

#if GTEST_HAS_GLOBAL_WSTRING
typedef ::wstring wstring;
#elif GTEST_HAS_STD_WSTRING
typedef ::std::wstring wstring;
#endif  // GTEST_HAS_GLOBAL_WSTRING

// A helper for suppressing warnings on constant condition.  It just
// returns 'condition'.
GTEST_API_ bool IsTrue(bool condition);

// Defines scoped_ptr.

// This implementation of scoped_ptr is PARTIAL - it only contains
// enough stuff to satisfy Google Test's need.
template <typename T>
class scoped_ptr {
 public:
  typedef T element_type;

  explicit scoped_ptr(T* p = NULL) : ptr_(p) {}
  ~scoped_ptr() { reset(); }

  T& operator*() const { return *ptr_; }
  T* operator->() const { return ptr_; }
  T* get() const { return ptr_; }

  T* release() {
    T* const ptr = ptr_;
    ptr_ = NULL;
    return ptr;
  }

  void reset(T* p = NULL) {
    if (p != ptr_) {
      if (IsTrue(sizeof(T) > 0)) {  // Makes sure T is a complete type.
        delete ptr_;
      }
      ptr_ = p;
    }
  }

  friend void swap(scoped_ptr& a, scoped_ptr& b) {
    using std::swap;
    swap(a.ptr_, b.ptr_);
  }

 private:
  T* ptr_;

  GTEST_DISALLOW_COPY_AND_ASSIGN_(scoped_ptr);
};

// Defines RE.

// A simple C++ wrapper for <regex.h>.  It uses the POSIX Extended
// Regular Expression syntax.
class GTEST_API_ RE {
 public:
  // A copy constructor is required by the Standard to initialize object
  // references from r-values.
  RE(const RE& other) { Init(other.pattern()); }

  // Constructs an RE from a string.
  RE(const ::std::string& regex) { Init(regex.c_str()); }  // NOLINT

#if GTEST_HAS_GLOBAL_STRING

  RE(const ::string& regex) { Init(regex.c_str()); }  // NOLINT

#endif  // GTEST_HAS_GLOBAL_STRING

  RE(const char* regex) { Init(regex); }  // NOLINT
  ~RE();

  // Returns the string representation of the regex.
  const char* pattern() const { return pattern_; }

  // FullMatch(str, re) returns true iff regular expression re matches
  // the entire str.
  // PartialMatch(str, re) returns true iff regular expression re
  // matches a substring of str (including str itself).
  //
  // TODO(wan@google.com): make FullMatch() and PartialMatch() work
  // when str contains NUL characters.
  static bool FullMatch(const ::std::string& str, const RE& re) {
    return FullMatch(str.c_str(), re);
  }
  static bool PartialMatch(const ::std::string& str, const RE& re) {
    return PartialMatch(str.c_str(), re);
  }

#if GTEST_HAS_GLOBAL_STRING

  static bool FullMatch(const ::string& str, const RE& re) {
    return FullMatch(str.c_str(), re);
  }
  static bool PartialMatch(const ::string& str, const RE& re) {
    return PartialMatch(str.c_str(), re);
  }

#endif  // GTEST_HAS_GLOBAL_STRING

  static bool FullMatch(const char* str, const RE& re);
  static bool PartialMatch(const char* str, const RE& re);

 private:
  void Init(const char* regex);

  // We use a const char* instead of an std::string, as Google Test used to be
  // used where std::string is not available.  TODO(wan@google.com): change to
  // std::string.
  const char* pattern_;
  bool is_valid_;

#if GTEST_USES_POSIX_RE

  regex_t full_regex_;     // For FullMatch().
  regex_t partial_regex_;  // For PartialMatch().

#else  // GTEST_USES_SIMPLE_RE

  const char* full_pattern_;  // For FullMatch();

#endif

  GTEST_DISALLOW_ASSIGN_(RE);
};

// Formats a source file path and a line number as they would appear
// in an error message from the compiler used to compile this code.
GTEST_API_ ::std::string FormatFileLocation(const char* file, int line);

// Formats a file location for compiler-independent XML output.
// Although this function is not platform dependent, we put it next to
// FormatFileLocation in order to contrast the two functions.
GTEST_API_ ::std::string FormatCompilerIndependentFileLocation(const char* file,
                                                               int line);

// Defines logging utilities:
//   GTEST_LOG_(severity) - logs messages at the specified severity level. The
//                          message itself is streamed into the macro.
//   LogToStderr()  - directs all log messages to stderr.
//   FlushInfoLog() - flushes informational log messages.

enum GTestLogSeverity {
  GTEST_INFO,
  GTEST_WARNING,
  GTEST_ERROR,
  GTEST_FATAL
};

// Formats log entry severity, provides a stream object for streaming the
// log message, and terminates the message with a newline when going out of
// scope.
class GTEST_API_ GTestLog {
 public:
  GTestLog(GTestLogSeverity severity, const char* file, int line);

  // Flushes the buffers and, if severity is GTEST_FATAL, aborts the program.
  ~GTestLog();

  ::std::ostream& GetStream() { return ::std::cerr; }

 private:
  const GTestLogSeverity severity_;

  GTEST_DISALLOW_COPY_AND_ASSIGN_(GTestLog);
};

#if !defined(GTEST_LOG_)

# define GTEST_LOG_(severity) \
    ::testing::internal::GTestLog(::testing::internal::GTEST_##severity, \
                                  __FILE__, __LINE__).GetStream()

inline void LogToStderr() {}
inline void FlushInfoLog() { fflush(NULL); }

#endif  // !defined(GTEST_LOG_)

#if !defined(GTEST_CHECK_)
// INTERNAL IMPLEMENTATION - DO NOT USE.
//
// GTEST_CHECK_ is an all-mode assert. It aborts the program if the condition
// is not satisfied.
//  Synopsys:
//    GTEST_CHECK_(boolean_condition);
//     or
//    GTEST_CHECK_(boolean_condition) << "Additional message";
//
//    This checks the condition and if the condition is not satisfied
//    it prints message about the condition violation, including the
//    condition itself, plus additional message streamed into it, if any,
//    and then it aborts the program. It aborts the program irrespective of
//    whether it is built in the debug mode or not.
# define GTEST_CHECK_(condition) \
    GTEST_AMBIGUOUS_ELSE_BLOCKER_ \
    if (::testing::internal::IsTrue(condition)) \
      ; \
    else \
      GTEST_LOG_(FATAL) << "Condition " #condition " failed. "
#endif  // !defined(GTEST_CHECK_)

// An all-mode assert to verify that the given POSIX-style function
// call returns 0 (indicating success).  Known limitation: this
// doesn't expand to a balanced 'if' statement, so enclose the macro
// in {} if you need to use it as the only statement in an 'if'
// branch.
#define GTEST_CHECK_POSIX_SUCCESS_(posix_call) \
  if (const int gtest_error = (posix_call)) \
    GTEST_LOG_(FATAL) << #posix_call << "failed with error " \
                      << gtest_error

#if GTEST_HAS_STD_MOVE_
using std::move;
#else  // GTEST_HAS_STD_MOVE_
template <typename T>
const T& move(const T& t) {
  return t;
}
#endif  // GTEST_HAS_STD_MOVE_

// INTERNAL IMPLEMENTATION - DO NOT USE IN USER CODE.
//
// Use ImplicitCast_ as a safe version of static_cast for upcasting in
// the type hierarchy (e.g. casting a Foo* to a SuperclassOfFoo* or a
// const Foo*).  When you use ImplicitCast_, the compiler checks that
// the cast is safe.  Such explicit ImplicitCast_s are necessary in
// surprisingly many situations where C++ demands an exact type match
// instead of an argument type convertable to a target type.
//
// The syntax for using ImplicitCast_ is the same as for static_cast:
//
//   ImplicitCast_<ToType>(expr)
//
// ImplicitCast_ would have been part of the C++ standard library,
// but the proposal was submitted too late.  It will probably make
// its way into the language in the future.
//
// This relatively ugly name is intentional. It prevents clashes with
// similar functions users may have (e.g., implicit_cast). The internal
// namespace alone is not enough because the function can be found by ADL.
template<typename To>
inline To ImplicitCast_(To x) { return x; }

// When you upcast (that is, cast a pointer from type Foo to type
// SuperclassOfFoo), it's fine to use ImplicitCast_<>, since upcasts
// always succeed.  When you downcast (that is, cast a pointer from
// type Foo to type SubclassOfFoo), static_cast<> isn't safe, because
// how do you know the pointer is really of type SubclassOfFoo?  It
// could be a bare Foo, or of type DifferentSubclassOfFoo.  Thus,
// when you downcast, you should use this macro.  In debug mode, we
// use dynamic_cast<> to double-check the downcast is legal (we die
// if it's not).  In normal mode, we do the efficient static_cast<>
// instead.  Thus, it's important to test in debug mode to make sure
// the cast is legal!
//    This is the only place in the code we should use dynamic_cast<>.
// In particular, you SHOULDN'T be using dynamic_cast<> in order to
// do RTTI (eg code like this:
//    if (dynamic_cast<Subclass1>(foo)) HandleASubclass1Object(foo);
//    if (dynamic_cast<Subclass2>(foo)) HandleASubclass2Object(foo);
// You should design the code some other way not to need this.
//
// This relatively ugly name is intentional. It prevents clashes with
// similar functions users may have (e.g., down_cast). The internal
// namespace alone is not enough because the function can be found by ADL.
template<typename To, typename From>  // use like this: DownCast_<T*>(foo);
inline To DownCast_(From* f) {  // so we only accept pointers
  // Ensures that To is a sub-type of From *.  This test is here only
  // for compile-time type checking, and has no overhead in an
  // optimized build at run-time, as it will be optimized away
  // completely.
  GTEST_INTENTIONAL_CONST_COND_PUSH_()
  if (false) {
  GTEST_INTENTIONAL_CONST_COND_POP_()
    const To to = NULL;
    ::testing::internal::ImplicitCast_<From*>(to);
  }

#if GTEST_HAS_RTTI
  // RTTI: debug mode only!
  GTEST_CHECK_(f == NULL || dynamic_cast<To>(f) != NULL);
#endif
  return static_cast<To>(f);
}

// Downcasts the pointer of type Base to Derived.
// Derived must be a subclass of Base. The parameter MUST
// point to a class of type Derived, not any subclass of it.
// When RTTI is available, the function performs a runtime
// check to enforce this.
template <class Derived, class Base>
Derived* CheckedDowncastToActualType(Base* base) {
#if GTEST_HAS_RTTI
  GTEST_CHECK_(typeid(*base) == typeid(Derived));
#endif

#if GTEST_HAS_DOWNCAST_
  return ::down_cast<Derived*>(base);
#elif GTEST_HAS_RTTI
  return dynamic_cast<Derived*>(base);  // NOLINT
#else
  return static_cast<Derived*>(base);  // Poor man's downcast.
#endif
}

#if GTEST_HAS_STREAM_REDIRECTION

// Defines the stderr capturer:
//   CaptureStdout     - starts capturing stdout.
//   GetCapturedStdout - stops capturing stdout and returns the captured string.
//   CaptureStderr     - starts capturing stderr.
//   GetCapturedStderr - stops capturing stderr and returns the captured string.
//
GTEST_API_ void CaptureStdout();
GTEST_API_ std::string GetCapturedStdout();
GTEST_API_ void CaptureStderr();
GTEST_API_ std::string GetCapturedStderr();

#endif  // GTEST_HAS_STREAM_REDIRECTION

// Returns a path to temporary directory.
GTEST_API_ std::string TempDir();

// Returns the size (in bytes) of a file.
GTEST_API_ size_t GetFileSize(FILE* file);

// Reads the entire content of a file as a string.
GTEST_API_ std::string ReadEntireFile(FILE* file);

// All command line arguments.
GTEST_API_ const ::std::vector<testing::internal::string>& GetArgvs();

#if GTEST_HAS_DEATH_TEST

const ::std::vector<testing::internal::string>& GetInjectableArgvs();
void SetInjectableArgvs(const ::std::vector<testing::internal::string>*
                             new_argvs);


#endif  // GTEST_HAS_DEATH_TEST

// Defines synchronization primitives.
#if GTEST_IS_THREADSAFE
# if GTEST_HAS_PTHREAD
// Sleeps for (roughly) n milliseconds.  This function is only for testing
// Google Test's own constructs.  Don't use it in user tests, either
// directly or indirectly.
inline void SleepMilliseconds(int n) {
  const timespec time = {
    0,                  // 0 seconds.
    n * 1000L * 1000L,  // And n ms.
  };
  nanosleep(&time, NULL);
}
# endif  // GTEST_HAS_PTHREAD

# if GTEST_HAS_NOTIFICATION_
// Notification has already been imported into the namespace.
// Nothing to do here.

# elif GTEST_HAS_PTHREAD
// Allows a controller thread to pause execution of newly created
// threads until notified.  Instances of this class must be created
// and destroyed in the controller thread.
//
// This class is only for testing Google Test's own constructs. Do not
// use it in user tests, either directly or indirectly.
class Notification {
 public:
  Notification() : notified_(false) {
    GTEST_CHECK_POSIX_SUCCESS_(pthread_mutex_init(&mutex_, NULL));
  }
  ~Notification() {
    pthread_mutex_destroy(&mutex_);
  }

  // Notifies all threads created with this notification to start. Must
  // be called from the controller thread.
  void Notify() {
    pthread_mutex_lock(&mutex_);
    notified_ = true;
    pthread_mutex_unlock(&mutex_);
  }

  // Blocks until the controller thread notifies. Must be called from a test
  // thread.
  void WaitForNotification() {
    for (;;) {
      pthread_mutex_lock(&mutex_);
      const bool notified = notified_;
      pthread_mutex_unlock(&mutex_);
      if (notified)
        break;
      SleepMilliseconds(10);
    }
  }

 private:
  pthread_mutex_t mutex_;
  bool notified_;

  GTEST_DISALLOW_COPY_AND_ASSIGN_(Notification);
};

# elif GTEST_OS_WINDOWS && !GTEST_OS_WINDOWS_PHONE && !GTEST_OS_WINDOWS_RT

GTEST_API_ void SleepMilliseconds(int n);

// Provides leak-safe Windows kernel handle ownership.
// Used in death tests and in threading support.
class GTEST_API_ AutoHandle {
 public:
  // Assume that Win32 HANDLE type is equivalent to void*. Doing so allows us to
  // avoid including <windows.h> in this header file. Including <windows.h> is
  // undesirable because it defines a lot of symbols and macros that tend to
  // conflict with client code. This assumption is verified by
  // WindowsTypesTest.HANDLEIsVoidStar.
  typedef void* Handle;
  AutoHandle();
  explicit AutoHandle(Handle handle);

  ~AutoHandle();

  Handle Get() const;
  void Reset();
  void Reset(Handle handle);

 private:
  // Returns true iff the handle is a valid handle object that can be closed.
  bool IsCloseable() const;

  Handle handle_;

  GTEST_DISALLOW_COPY_AND_ASSIGN_(AutoHandle);
};

// Allows a controller thread to pause execution of newly created
// threads until notified.  Instances of this class must be created
// and destroyed in the controller thread.
//
// This class is only for testing Google Test's own constructs. Do not
// use it in user tests, either directly or indirectly.
class GTEST_API_ Notification {
 public:
  Notification();
  void Notify();
  void WaitForNotification();

 private:
  AutoHandle event_;

  GTEST_DISALLOW_COPY_AND_ASSIGN_(Notification);
};
# endif  // GTEST_HAS_NOTIFICATION_

// On MinGW, we can have both GTEST_OS_WINDOWS and GTEST_HAS_PTHREAD
// defined, but we don't want to use MinGW's pthreads implementation, which
// has conformance problems with some versions of the POSIX standard.
# if GTEST_HAS_PTHREAD && !GTEST_OS_WINDOWS_MINGW

// As a C-function, ThreadFuncWithCLinkage cannot be templated itself.
// Consequently, it cannot select a correct instantiation of ThreadWithParam
// in order to call its Run(). Introducing ThreadWithParamBase as a
// non-templated base class for ThreadWithParam allows us to bypass this
// problem.
class ThreadWithParamBase {
 public:
  virtual ~ThreadWithParamBase() {}
  virtual void Run() = 0;
};

// pthread_create() accepts a pointer to a function type with the C linkage.
// According to the Standard (7.5/1), function types with different linkages
// are different even if they are otherwise identical.  Some compilers (for
// example, SunStudio) treat them as different types.  Since class methods
// cannot be defined with C-linkage we need to define a free C-function to
// pass into pthread_create().
extern "C" inline void* ThreadFuncWithCLinkage(void* thread) {
  static_cast<ThreadWithParamBase*>(thread)->Run();
  return NULL;
}

// Helper class for testing Google Test's multi-threading constructs.
// To use it, write:
//
//   void ThreadFunc(int param) { /* Do things with param */ }
//   Notification thread_can_start;
//   ...
//   // The thread_can_start parameter is optional; you can supply NULL.
//   ThreadWithParam<int> thread(&ThreadFunc, 5, &thread_can_start);
//   thread_can_start.Notify();
//
// These classes are only for testing Google Test's own constructs. Do
// not use them in user tests, either directly or indirectly.
template <typename T>
class ThreadWithParam : public ThreadWithParamBase {
 public:
  typedef void UserThreadFunc(T);

  ThreadWithParam(UserThreadFunc* func, T param, Notification* thread_can_start)
      : func_(func),
        param_(param),
        thread_can_start_(thread_can_start),
        finished_(false) {
    ThreadWithParamBase* const base = this;
    // The thread can be created only after all fields except thread_
    // have been initialized.
    GTEST_CHECK_POSIX_SUCCESS_(
        pthread_create(&thread_, 0, &ThreadFuncWithCLinkage, base));
  }
  ~ThreadWithParam() { Join(); }

  void Join() {
    if (!finished_) {
      GTEST_CHECK_POSIX_SUCCESS_(pthread_join(thread_, 0));
      finished_ = true;
    }
  }

  virtual void Run() {
    if (thread_can_start_ != NULL)
      thread_can_start_->WaitForNotification();
    func_(param_);
  }

 private:
  UserThreadFunc* const func_;  // User-supplied thread function.
  const T param_;  // User-supplied parameter to the thread function.
  // When non-NULL, used to block execution until the controller thread
  // notifies.
  Notification* const thread_can_start_;
  bool finished_;  // true iff we know that the thread function has finished.
  pthread_t thread_;  // The native thread object.

  GTEST_DISALLOW_COPY_AND_ASSIGN_(ThreadWithParam);
};
# endif  // !GTEST_OS_WINDOWS && GTEST_HAS_PTHREAD ||
         // GTEST_HAS_MUTEX_AND_THREAD_LOCAL_

# if GTEST_HAS_MUTEX_AND_THREAD_LOCAL_
// Mutex and ThreadLocal have already been imported into the namespace.
// Nothing to do here.

# elif GTEST_OS_WINDOWS && !GTEST_OS_WINDOWS_PHONE && !GTEST_OS_WINDOWS_RT

// Mutex implements mutex on Windows platforms.  It is used in conjunction
// with class MutexLock:
//
//   Mutex mutex;
//   ...
//   MutexLock lock(&mutex);  // Acquires the mutex and releases it at the
//                            // end of the current scope.
//
// A static Mutex *must* be defined or declared using one of the following
// macros:
//   GTEST_DEFINE_STATIC_MUTEX_(g_some_mutex);
//   GTEST_DECLARE_STATIC_MUTEX_(g_some_mutex);
//
// (A non-static Mutex is defined/declared in the usual way).
class GTEST_API_ Mutex {
 public:
  enum MutexType { kStatic = 0, kDynamic = 1 };
  // We rely on kStaticMutex being 0 as it is to what the linker initializes
  // type_ in static mutexes.  critical_section_ will be initialized lazily
  // in ThreadSafeLazyInit().
  enum StaticConstructorSelector { kStaticMutex = 0 };

  // This constructor intentionally does nothing.  It relies on type_ being
  // statically initialized to 0 (effectively setting it to kStatic) and on
  // ThreadSafeLazyInit() to lazily initialize the rest of the members.
  explicit Mutex(StaticConstructorSelector /*dummy*/) {}

  Mutex();
  ~Mutex();

  void Lock();

  void Unlock();

  // Does nothing if the current thread holds the mutex. Otherwise, crashes
  // with high probability.
  void AssertHeld();

 private:
  // Initializes owner_thread_id_ and critical_section_ in static mutexes.
  void ThreadSafeLazyInit();

  // Per http://blogs.msdn.com/b/oldnewthing/archive/2004/02/23/78395.aspx,
  // we assume that 0 is an invalid value for thread IDs.
  unsigned int owner_thread_id_;

  // For static mutexes, we rely on these members being initialized to zeros
  // by the linker.
  MutexType type_;
  long critical_section_init_phase_;  // NOLINT
  _RTL_CRITICAL_SECTION* critical_section_;

  GTEST_DISALLOW_COPY_AND_ASSIGN_(Mutex);
};

# define GTEST_DECLARE_STATIC_MUTEX_(mutex) \
    extern ::testing::internal::Mutex mutex

# define GTEST_DEFINE_STATIC_MUTEX_(mutex) \
    ::testing::internal::Mutex mutex(::testing::internal::Mutex::kStaticMutex)

// We cannot name this class MutexLock because the ctor declaration would
// conflict with a macro named MutexLock, which is defined on some
// platforms. That macro is used as a defensive measure to prevent against
// inadvertent misuses of MutexLock like "MutexLock(&mu)" rather than
// "MutexLock l(&mu)".  Hence the typedef trick below.
class GTestMutexLock {
 public:
  explicit GTestMutexLock(Mutex* mutex)
      : mutex_(mutex) { mutex_->Lock(); }

  ~GTestMutexLock() { mutex_->Unlock(); }

 private:
  Mutex* const mutex_;

  GTEST_DISALLOW_COPY_AND_ASSIGN_(GTestMutexLock);
};

typedef GTestMutexLock MutexLock;

// Base class for ValueHolder<T>.  Allows a caller to hold and delete a value
// without knowing its type.
class ThreadLocalValueHolderBase {
 public:
  virtual ~ThreadLocalValueHolderBase() {}
};

// Provides a way for a thread to send notifications to a ThreadLocal
// regardless of its parameter type.
class ThreadLocalBase {
 public:
  // Creates a new ValueHolder<T> object holding a default value passed to
  // this ThreadLocal<T>'s constructor and returns it.  It is the caller's
  // responsibility not to call this when the ThreadLocal<T> instance already
  // has a value on the current thread.
  virtual ThreadLocalValueHolderBase* NewValueForCurrentThread() const = 0;

 protected:
  ThreadLocalBase() {}
  virtual ~ThreadLocalBase() {}

 private:
  GTEST_DISALLOW_COPY_AND_ASSIGN_(ThreadLocalBase);
};

// Maps a thread to a set of ThreadLocals that have values instantiated on that
// thread and notifies them when the thread exits.  A ThreadLocal instance is
// expected to persist until all threads it has values on have terminated.
class GTEST_API_ ThreadLocalRegistry {
 public:
  // Registers thread_local_instance as having value on the current thread.
  // Returns a value that can be used to identify the thread from other threads.
  static ThreadLocalValueHolderBase* GetValueOnCurrentThread(
      const ThreadLocalBase* thread_local_instance);

  // Invoked when a ThreadLocal instance is destroyed.
  static void OnThreadLocalDestroyed(
      const ThreadLocalBase* thread_local_instance);
};

class GTEST_API_ ThreadWithParamBase {
 public:
  void Join();

 protected:
  class Runnable {
   public:
    virtual ~Runnable() {}
    virtual void Run() = 0;
  };

  ThreadWithParamBase(Runnable *runnable, Notification* thread_can_start);
  virtual ~ThreadWithParamBase();

 private:
  AutoHandle thread_;
};

// Helper class for testing Google Test's multi-threading constructs.
template <typename T>
class ThreadWithParam : public ThreadWithParamBase {
 public:
  typedef void UserThreadFunc(T);

  ThreadWithParam(UserThreadFunc* func, T param, Notification* thread_can_start)
      : ThreadWithParamBase(new RunnableImpl(func, param), thread_can_start) {
  }
  virtual ~ThreadWithParam() {}

 private:
  class RunnableImpl : public Runnable {
   public:
    RunnableImpl(UserThreadFunc* func, T param)
        : func_(func),
          param_(param) {
    }
    virtual ~RunnableImpl() {}
    virtual void Run() {
      func_(param_);
    }

   private:
    UserThreadFunc* const func_;
    const T param_;

    GTEST_DISALLOW_COPY_AND_ASSIGN_(RunnableImpl);
  };

  GTEST_DISALLOW_COPY_AND_ASSIGN_(ThreadWithParam);
};

// Implements thread-local storage on Windows systems.
//
//   // Thread 1
//   ThreadLocal<int> tl(100);  // 100 is the default value for each thread.
//
//   // Thread 2
//   tl.set(150);  // Changes the value for thread 2 only.
//   EXPECT_EQ(150, tl.get());
//
//   // Thread 1
//   EXPECT_EQ(100, tl.get());  // In thread 1, tl has the original value.
//   tl.set(200);
//   EXPECT_EQ(200, tl.get());
//
// The template type argument T must have a public copy constructor.
// In addition, the default ThreadLocal constructor requires T to have
// a public default constructor.
//
// The users of a TheadLocal instance have to make sure that all but one
// threads (including the main one) using that instance have exited before
// destroying it. Otherwise, the per-thread objects managed for them by the
// ThreadLocal instance are not guaranteed to be destroyed on all platforms.
//
// Google Test only uses global ThreadLocal objects.  That means they
// will die after main() has returned.  Therefore, no per-thread
// object managed by Google Test will be leaked as long as all threads
// using Google Test have exited when main() returns.
template <typename T>
class ThreadLocal : public ThreadLocalBase {
 public:
  ThreadLocal() : default_factory_(new DefaultValueHolderFactory()) {}
  explicit ThreadLocal(const T& value)
      : default_factory_(new InstanceValueHolderFactory(value)) {}

  ~ThreadLocal() { ThreadLocalRegistry::OnThreadLocalDestroyed(this); }

  T* pointer() { return GetOrCreateValue(); }
  const T* pointer() const { return GetOrCreateValue(); }
  const T& get() const { return *pointer(); }
  void set(const T& value) { *pointer() = value; }

 private:
  // Holds a value of T.  Can be deleted via its base class without the caller
  // knowing the type of T.
  class ValueHolder : public ThreadLocalValueHolderBase {
   public:
    ValueHolder() : value_() {}
    explicit ValueHolder(const T& value) : value_(value) {}

    T* pointer() { return &value_; }

   private:
    T value_;
    GTEST_DISALLOW_COPY_AND_ASSIGN_(ValueHolder);
  };


  T* GetOrCreateValue() const {
    return static_cast<ValueHolder*>(
        ThreadLocalRegistry::GetValueOnCurrentThread(this))->pointer();
  }

  virtual ThreadLocalValueHolderBase* NewValueForCurrentThread() const {
    return default_factory_->MakeNewHolder();
  }

  class ValueHolderFactory {
   public:
    ValueHolderFactory() {}
    virtual ~ValueHolderFactory() {}
    virtual ValueHolder* MakeNewHolder() const = 0;

   private:
    GTEST_DISALLOW_COPY_AND_ASSIGN_(ValueHolderFactory);
  };

  class DefaultValueHolderFactory : public ValueHolderFactory {
   public:
    DefaultValueHolderFactory() {}
    virtual ValueHolder* MakeNewHolder() const { return new ValueHolder(); }

   private:
    GTEST_DISALLOW_COPY_AND_ASSIGN_(DefaultValueHolderFactory);
  };

  class InstanceValueHolderFactory : public ValueHolderFactory {
   public:
    explicit InstanceValueHolderFactory(const T& value) : value_(value) {}
    virtual ValueHolder* MakeNewHolder() const {
      return new ValueHolder(value_);
    }

   private:
    const T value_;  // The value for each thread.

    GTEST_DISALLOW_COPY_AND_ASSIGN_(InstanceValueHolderFactory);
  };

  scoped_ptr<ValueHolderFactory> default_factory_;

  GTEST_DISALLOW_COPY_AND_ASSIGN_(ThreadLocal);
};

# elif GTEST_HAS_PTHREAD

// MutexBase and Mutex implement mutex on pthreads-based platforms.
class MutexBase {
 public:
  // Acquires this mutex.
  void Lock() {
    GTEST_CHECK_POSIX_SUCCESS_(pthread_mutex_lock(&mutex_));
    owner_ = pthread_self();
    has_owner_ = true;
  }

  // Releases this mutex.
  void Unlock() {
    // Since the lock is being released the owner_ field should no longer be
    // considered valid. We don't protect writing to has_owner_ here, as it's
    // the caller's responsibility to ensure that the current thread holds the
    // mutex when this is called.
    has_owner_ = false;
    GTEST_CHECK_POSIX_SUCCESS_(pthread_mutex_unlock(&mutex_));
  }

  // Does nothing if the current thread holds the mutex. Otherwise, crashes
  // with high probability.
  void AssertHeld() const {
    GTEST_CHECK_(has_owner_ && pthread_equal(owner_, pthread_self()))
        << "The current thread is not holding the mutex @" << this;
  }

  // A static mutex may be used before main() is entered.  It may even
  // be used before the dynamic initialization stage.  Therefore we
  // must be able to initialize a static mutex object at link time.
  // This means MutexBase has to be a POD and its member variables
  // have to be public.
 public:
  pthread_mutex_t mutex_;  // The underlying pthread mutex.
  // has_owner_ indicates whether the owner_ field below contains a valid thread
  // ID and is therefore safe to inspect (e.g., to use in pthread_equal()). All
  // accesses to the owner_ field should be protected by a check of this field.
  // An alternative might be to memset() owner_ to all zeros, but there's no
  // guarantee that a zero'd pthread_t is necessarily invalid or even different
  // from pthread_self().
  bool has_owner_;
  pthread_t owner_;  // The thread holding the mutex.
};

// Forward-declares a static mutex.
#  define GTEST_DECLARE_STATIC_MUTEX_(mutex) \
     extern ::testing::internal::MutexBase mutex

// Defines and statically (i.e. at link time) initializes a static mutex.
#  define GTEST_DEFINE_STATIC_MUTEX_(mutex) \
     ::testing::internal::MutexBase mutex = { PTHREAD_MUTEX_INITIALIZER, false, pthread_t() }

// The Mutex class can only be used for mutexes created at runtime. It
// shares its API with MutexBase otherwise.
class Mutex : public MutexBase {
 public:
  Mutex() {
    GTEST_CHECK_POSIX_SUCCESS_(pthread_mutex_init(&mutex_, NULL));
    has_owner_ = false;
  }
  ~Mutex() {
    GTEST_CHECK_POSIX_SUCCESS_(pthread_mutex_destroy(&mutex_));
  }

 private:
  GTEST_DISALLOW_COPY_AND_ASSIGN_(Mutex);
};

// We cannot name this class MutexLock because the ctor declaration would
// conflict with a macro named MutexLock, which is defined on some
// platforms. That macro is used as a defensive measure to prevent against
// inadvertent misuses of MutexLock like "MutexLock(&mu)" rather than
// "MutexLock l(&mu)".  Hence the typedef trick below.
class GTestMutexLock {
 public:
  explicit GTestMutexLock(MutexBase* mutex)
      : mutex_(mutex) { mutex_->Lock(); }

  ~GTestMutexLock() { mutex_->Unlock(); }

 private:
  MutexBase* const mutex_;

  GTEST_DISALLOW_COPY_AND_ASSIGN_(GTestMutexLock);
};

typedef GTestMutexLock MutexLock;

// Helpers for ThreadLocal.

// pthread_key_create() requires DeleteThreadLocalValue() to have
// C-linkage.  Therefore it cannot be templatized to access
// ThreadLocal<T>.  Hence the need for class
// ThreadLocalValueHolderBase.
class ThreadLocalValueHolderBase {
 public:
  virtual ~ThreadLocalValueHolderBase() {}
};

// Called by pthread to delete thread-local data stored by
// pthread_setspecific().
extern "C" inline void DeleteThreadLocalValue(void* value_holder) {
  delete static_cast<ThreadLocalValueHolderBase*>(value_holder);
}

// Implements thread-local storage on pthreads-based systems.
template <typename T>
class ThreadLocal {
 public:
  ThreadLocal()
      : key_(CreateKey()), default_factory_(new DefaultValueHolderFactory()) {}
  explicit ThreadLocal(const T& value)
      : key_(CreateKey()),
        default_factory_(new InstanceValueHolderFactory(value)) {}

  ~ThreadLocal() {
    // Destroys the managed object for the current thread, if any.
    DeleteThreadLocalValue(pthread_getspecific(key_));

    // Releases resources associated with the key.  This will *not*
    // delete managed objects for other threads.
    GTEST_CHECK_POSIX_SUCCESS_(pthread_key_delete(key_));
  }

  T* pointer() { return GetOrCreateValue(); }
  const T* pointer() const { return GetOrCreateValue(); }
  const T& get() const { return *pointer(); }
  void set(const T& value) { *pointer() = value; }

 private:
  // Holds a value of type T.
  class ValueHolder : public ThreadLocalValueHolderBase {
   public:
    ValueHolder() : value_() {}
    explicit ValueHolder(const T& value) : value_(value) {}

    T* pointer() { return &value_; }

   private:
    T value_;
    GTEST_DISALLOW_COPY_AND_ASSIGN_(ValueHolder);
  };

  static pthread_key_t CreateKey() {
    pthread_key_t key;
    // When a thread exits, DeleteThreadLocalValue() will be called on
    // the object managed for that thread.
    GTEST_CHECK_POSIX_SUCCESS_(
        pthread_key_create(&key, &DeleteThreadLocalValue));
    return key;
  }

  T* GetOrCreateValue() const {
    ThreadLocalValueHolderBase* const holder =
        static_cast<ThreadLocalValueHolderBase*>(pthread_getspecific(key_));
    if (holder != NULL) {
      return CheckedDowncastToActualType<ValueHolder>(holder)->pointer();
    }

    ValueHolder* const new_holder = default_factory_->MakeNewHolder();
    ThreadLocalValueHolderBase* const holder_base = new_holder;
    GTEST_CHECK_POSIX_SUCCESS_(pthread_setspecific(key_, holder_base));
    return new_holder->pointer();
  }

  class ValueHolderFactory {
   public:
    ValueHolderFactory() {}
    virtual ~ValueHolderFactory() {}
    virtual ValueHolder* MakeNewHolder() const = 0;

   private:
    GTEST_DISALLOW_COPY_AND_ASSIGN_(ValueHolderFactory);
  };

  class DefaultValueHolderFactory : public ValueHolderFactory {
   public:
    DefaultValueHolderFactory() {}
    virtual ValueHolder* MakeNewHolder() const { return new ValueHolder(); }

   private:
    GTEST_DISALLOW_COPY_AND_ASSIGN_(DefaultValueHolderFactory);
  };

  class InstanceValueHolderFactory : public ValueHolderFactory {
   public:
    explicit InstanceValueHolderFactory(const T& value) : value_(value) {}
    virtual ValueHolder* MakeNewHolder() const {
      return new ValueHolder(value_);
    }

   private:
    const T value_;  // The value for each thread.

    GTEST_DISALLOW_COPY_AND_ASSIGN_(InstanceValueHolderFactory);
  };

  // A key pthreads uses for looking up per-thread values.
  const pthread_key_t key_;
  scoped_ptr<ValueHolderFactory> default_factory_;

  GTEST_DISALLOW_COPY_AND_ASSIGN_(ThreadLocal);
};

# endif  // GTEST_HAS_MUTEX_AND_THREAD_LOCAL_

#else  // GTEST_IS_THREADSAFE

// A dummy implementation of synchronization primitives (mutex, lock,
// and thread-local variable).  Necessary for compiling Google Test where
// mutex is not supported - using Google Test in multiple threads is not
// supported on such platforms.

class Mutex {
 public:
  Mutex() {}
  void Lock() {}
  void Unlock() {}
  void AssertHeld() const {}
};

# define GTEST_DECLARE_STATIC_MUTEX_(mutex) \
  extern ::testing::internal::Mutex mutex

# define GTEST_DEFINE_STATIC_MUTEX_(mutex) ::testing::internal::Mutex mutex

// We cannot name this class MutexLock because the ctor declaration would
// conflict with a macro named MutexLock, which is defined on some
// platforms. That macro is used as a defensive measure to prevent against
// inadvertent misuses of MutexLock like "MutexLock(&mu)" rather than
// "MutexLock l(&mu)".  Hence the typedef trick below.
class GTestMutexLock {
 public:
  explicit GTestMutexLock(Mutex*) {}  // NOLINT
};

typedef GTestMutexLock MutexLock;

template <typename T>
class ThreadLocal {
 public:
  ThreadLocal() : value_() {}
  explicit ThreadLocal(const T& value) : value_(value) {}
  T* pointer() { return &value_; }
  const T* pointer() const { return &value_; }
  const T& get() const { return value_; }
  void set(const T& value) { value_ = value; }
 private:
  T value_;
};

#endif  // GTEST_IS_THREADSAFE

// Returns the number of threads running in the process, or 0 to indicate that
// we cannot detect it.
GTEST_API_ size_t GetThreadCount();

// Passing non-POD classes through ellipsis (...) crashes the ARM
// compiler and generates a warning in Sun Studio.  The Nokia Symbian
// and the IBM XL C/C++ compiler try to instantiate a copy constructor
// for objects passed through ellipsis (...), failing for uncopyable
// objects.  We define this to ensure that only POD is passed through
// ellipsis on these systems.
#if defined(__SYMBIAN32__) || defined(__IBMCPP__) || defined(__SUNPRO_CC)
// We lose support for NULL detection where the compiler doesn't like
// passing non-POD classes through ellipsis (...).
# define GTEST_ELLIPSIS_NEEDS_POD_ 1
#else
# define GTEST_CAN_COMPARE_NULL 1
#endif

// The Nokia Symbian and IBM XL C/C++ compilers cannot decide between
// const T& and const T* in a function template.  These compilers
// _can_ decide between class template specializations for T and T*,
// so a tr1::type_traits-like is_pointer works.
#if defined(__SYMBIAN32__) || defined(__IBMCPP__)
# define GTEST_NEEDS_IS_POINTER_ 1
#endif

template <bool bool_value>
struct bool_constant {
  typedef bool_constant<bool_value> type;
  static const bool value = bool_value;
};
template <bool bool_value> const bool bool_constant<bool_value>::value;

typedef bool_constant<false> false_type;
typedef bool_constant<true> true_type;

template <typename T>
struct is_pointer : public false_type {};

template <typename T>
struct is_pointer<T*> : public true_type {};

template <typename Iterator>
struct IteratorTraits {
  typedef typename Iterator::value_type value_type;
};

template <typename T>
struct IteratorTraits<T*> {
  typedef T value_type;
};

template <typename T>
struct IteratorTraits<const T*> {
  typedef T value_type;
};

#if GTEST_OS_WINDOWS
# define GTEST_PATH_SEP_ "\\"
# define GTEST_HAS_ALT_PATH_SEP_ 1
// The biggest signed integer type the compiler supports.
typedef __int64 BiggestInt;
#else
# define GTEST_PATH_SEP_ "/"
# define GTEST_HAS_ALT_PATH_SEP_ 0
typedef long long BiggestInt;  // NOLINT
#endif  // GTEST_OS_WINDOWS

// Utilities for char.

// isspace(int ch) and friends accept an unsigned char or EOF.  char
// may be signed, depending on the compiler (or compiler flags).
// Therefore we need to cast a char to unsigned char before calling
// isspace(), etc.

inline bool IsAlpha(char ch) {
  return isalpha(static_cast<unsigned char>(ch)) != 0;
}
inline bool IsAlNum(char ch) {
  return isalnum(static_cast<unsigned char>(ch)) != 0;
}
inline bool IsDigit(char ch) {
  return isdigit(static_cast<unsigned char>(ch)) != 0;
}
inline bool IsLower(char ch) {
  return islower(static_cast<unsigned char>(ch)) != 0;
}
inline bool IsSpace(char ch) {
  return isspace(static_cast<unsigned char>(ch)) != 0;
}
inline bool IsUpper(char ch) {
  return isupper(static_cast<unsigned char>(ch)) != 0;
}
inline bool IsXDigit(char ch) {
  return isxdigit(static_cast<unsigned char>(ch)) != 0;
}
inline bool IsXDigit(wchar_t ch) {
  const unsigned char low_byte = static_cast<unsigned char>(ch);
  return ch == low_byte && isxdigit(low_byte) != 0;
}

inline char ToLower(char ch) {
  return static_cast<char>(tolower(static_cast<unsigned char>(ch)));
}
inline char ToUpper(char ch) {
  return static_cast<char>(toupper(static_cast<unsigned char>(ch)));
}

inline std::string StripTrailingSpaces(std::string str) {
  std::string::iterator it = str.end();
  while (it != str.begin() && IsSpace(*--it))
    it = str.erase(it);
  return str;
}

// The testing::internal::posix namespace holds wrappers for common
// POSIX functions.  These wrappers hide the differences between
// Windows/MSVC and POSIX systems.  Since some compilers define these
// standard functions as macros, the wrapper cannot have the same name
// as the wrapped function.

namespace posix {

// Functions with a different name on Windows.

#if GTEST_OS_WINDOWS

typedef struct _stat StatStruct;

# ifdef __BORLANDC__
inline int IsATTY(int fd) { return isatty(fd); }
inline int StrCaseCmp(const char* s1, const char* s2) {
  return stricmp(s1, s2);
}
inline char* StrDup(const char* src) { return strdup(src); }
# else  // !__BORLANDC__
#  if GTEST_OS_WINDOWS_MOBILE
inline int IsATTY(int /* fd */) { return 0; }
#  else
inline int IsATTY(int fd) { return _isatty(fd); }
#  endif  // GTEST_OS_WINDOWS_MOBILE
inline int StrCaseCmp(const char* s1, const char* s2) {
  return _stricmp(s1, s2);
}
inline char* StrDup(const char* src) { return _strdup(src); }
# endif  // __BORLANDC__

# if GTEST_OS_WINDOWS_MOBILE
inline int FileNo(FILE* file) { return reinterpret_cast<int>(_fileno(file)); }
// Stat(), RmDir(), and IsDir() are not needed on Windows CE at this
// time and thus not defined there.
# else
inline int FileNo(FILE* file) { return _fileno(file); }
inline int Stat(const char* path, StatStruct* buf) { return _stat(path, buf); }
inline int RmDir(const char* dir) { return _rmdir(dir); }
inline bool IsDir(const StatStruct& st) {
  return (_S_IFDIR & st.st_mode) != 0;
}
# endif  // GTEST_OS_WINDOWS_MOBILE

#else

typedef struct stat StatStruct;

inline int FileNo(FILE* file) { return fileno(file); }
inline int IsATTY(int fd) { return isatty(fd); }
inline int Stat(const char* path, StatStruct* buf) { return stat(path, buf); }
inline int StrCaseCmp(const char* s1, const char* s2) {
  return strcasecmp(s1, s2);
}
inline char* StrDup(const char* src) { return strdup(src); }
inline int RmDir(const char* dir) { return rmdir(dir); }
inline bool IsDir(const StatStruct& st) { return S_ISDIR(st.st_mode); }

#endif  // GTEST_OS_WINDOWS

// Functions deprecated by MSVC 8.0.

GTEST_DISABLE_MSC_WARNINGS_PUSH_(4996 /* deprecated function */)

inline const char* StrNCpy(char* dest, const char* src, size_t n) {
  return strncpy(dest, src, n);
}

// ChDir(), FReopen(), FDOpen(), Read(), Write(), Close(), and
// StrError() aren't needed on Windows CE at this time and thus not
// defined there.

#if !GTEST_OS_WINDOWS_MOBILE && !GTEST_OS_WINDOWS_PHONE && !GTEST_OS_WINDOWS_RT
inline int ChDir(const char* dir) { return chdir(dir); }
#endif
inline FILE* FOpen(const char* path, const char* mode) {
  return fopen(path, mode);
}
#if !GTEST_OS_WINDOWS_MOBILE
inline FILE *FReopen(const char* path, const char* mode, FILE* stream) {
  return freopen(path, mode, stream);
}
inline FILE* FDOpen(int fd, const char* mode) { return fdopen(fd, mode); }
#endif
inline int FClose(FILE* fp) { return fclose(fp); }
#if !GTEST_OS_WINDOWS_MOBILE
inline int Read(int fd, void* buf, unsigned int count) {
  return static_cast<int>(read(fd, buf, count));
}
inline int Write(int fd, const void* buf, unsigned int count) {
  return static_cast<int>(write(fd, buf, count));
}
inline int Close(int fd) { return close(fd); }
inline const char* StrError(int errnum) { return strerror(errnum); }
#endif
inline const char* GetEnv(const char* name) {
#if GTEST_OS_WINDOWS_MOBILE || GTEST_OS_WINDOWS_PHONE | GTEST_OS_WINDOWS_RT
  // We are on Windows CE, which has no environment variables.
  static_cast<void>(name);  // To prevent 'unused argument' warning.
  return NULL;
#elif defined(__BORLANDC__) || defined(__SunOS_5_8) || defined(__SunOS_5_9)
  // Environment variables which we programmatically clear will be set to the
  // empty string rather than unset (NULL).  Handle that case.
  const char* const env = getenv(name);
  return (env != NULL && env[0] != '\0') ? env : NULL;
#else
  return getenv(name);
#endif
}

GTEST_DISABLE_MSC_WARNINGS_POP_()

#if GTEST_OS_WINDOWS_MOBILE
// Windows CE has no C library. The abort() function is used in
// several places in Google Test. This implementation provides a reasonable
// imitation of standard behaviour.
void Abort();
#else
inline void Abort() { abort(); }
#endif  // GTEST_OS_WINDOWS_MOBILE

}  // namespace posix

// MSVC "deprecates" snprintf and issues warnings wherever it is used.  In
// order to avoid these warnings, we need to use _snprintf or _snprintf_s on
// MSVC-based platforms.  We map the GTEST_SNPRINTF_ macro to the appropriate
// function in order to achieve that.  We use macro definition here because
// snprintf is a variadic function.
#if _MSC_VER >= 1400 && !GTEST_OS_WINDOWS_MOBILE
// MSVC 2005 and above support variadic macros.
# define GTEST_SNPRINTF_(buffer, size, format, ...) \
     _snprintf_s(buffer, size, size, format, __VA_ARGS__)
#elif defined(_MSC_VER)
// Windows CE does not define _snprintf_s and MSVC prior to 2005 doesn't
// complain about _snprintf.
# define GTEST_SNPRINTF_ _snprintf
#else
# define GTEST_SNPRINTF_ snprintf
#endif

// The maximum number a BiggestInt can represent.  This definition
// works no matter BiggestInt is represented in one's complement or
// two's complement.
//
// We cannot rely on numeric_limits in STL, as __int64 and long long
// are not part of standard C++ and numeric_limits doesn't need to be
// defined for them.
const BiggestInt kMaxBiggestInt =
    ~(static_cast<BiggestInt>(1) << (8*sizeof(BiggestInt) - 1));

// This template class serves as a compile-time function from size to
// type.  It maps a size in bytes to a primitive type with that
// size. e.g.
//
//   TypeWithSize<4>::UInt
//
// is typedef-ed to be unsigned int (unsigned integer made up of 4
// bytes).
//
// Such functionality should belong to STL, but I cannot find it
// there.
//
// Google Test uses this class in the implementation of floating-point
// comparison.
//
// For now it only handles UInt (unsigned int) as that's all Google Test
// needs.  Other types can be easily added in the future if need
// arises.
template <size_t size>
class TypeWithSize {
 public:
  // This prevents the user from using TypeWithSize<N> with incorrect
  // values of N.
  typedef void UInt;
};

// The specialization for size 4.
template <>
class TypeWithSize<4> {
 public:
  // unsigned int has size 4 in both gcc and MSVC.
  //
  // As base/basictypes.h doesn't compile on Windows, we cannot use
  // uint32, uint64, and etc here.
  typedef int Int;
  typedef unsigned int UInt;
};

// The specialization for size 8.
template <>
class TypeWithSize<8> {
 public:
#if GTEST_OS_WINDOWS
  typedef __int64 Int;
  typedef unsigned __int64 UInt;
#else
  typedef long long Int;  // NOLINT
  typedef unsigned long long UInt;  // NOLINT
#endif  // GTEST_OS_WINDOWS
};

// Integer types of known sizes.
typedef TypeWithSize<4>::Int Int32;
typedef TypeWithSize<4>::UInt UInt32;
typedef TypeWithSize<8>::Int Int64;
typedef TypeWithSize<8>::UInt UInt64;
typedef TypeWithSize<8>::Int TimeInMillis;  // Represents time in milliseconds.

// Utilities for command line flags and environment variables.

// Macro for referencing flags.
#if !defined(GTEST_FLAG)
# define GTEST_FLAG(name) FLAGS_gtest_##name
#endif  // !defined(GTEST_FLAG)

#if !defined(GTEST_USE_OWN_FLAGFILE_FLAG_)
# define GTEST_USE_OWN_FLAGFILE_FLAG_ 1
#endif  // !defined(GTEST_USE_OWN_FLAGFILE_FLAG_)

#if !defined(GTEST_DECLARE_bool_)
# define GTEST_FLAG_SAVER_ ::testing::internal::GTestFlagSaver

// Macros for declaring flags.
# define GTEST_DECLARE_bool_(name) GTEST_API_ extern bool GTEST_FLAG(name)
# define GTEST_DECLARE_int32_(name) \
    GTEST_API_ extern ::testing::internal::Int32 GTEST_FLAG(name)
#define GTEST_DECLARE_string_(name) \
    GTEST_API_ extern ::std::string GTEST_FLAG(name)

// Macros for defining flags.
#define GTEST_DEFINE_bool_(name, default_val, doc) \
    GTEST_API_ bool GTEST_FLAG(name) = (default_val)
#define GTEST_DEFINE_int32_(name, default_val, doc) \
    GTEST_API_ ::testing::internal::Int32 GTEST_FLAG(name) = (default_val)
#define GTEST_DEFINE_string_(name, default_val, doc) \
    GTEST_API_ ::std::string GTEST_FLAG(name) = (default_val)

#endif  // !defined(GTEST_DECLARE_bool_)

// Thread annotations
#if !defined(GTEST_EXCLUSIVE_LOCK_REQUIRED_)
# define GTEST_EXCLUSIVE_LOCK_REQUIRED_(locks)
# define GTEST_LOCK_EXCLUDED_(locks)
#endif  // !defined(GTEST_EXCLUSIVE_LOCK_REQUIRED_)

// Parses 'str' for a 32-bit signed integer.  If successful, writes the result
// to *value and returns true; otherwise leaves *value unchanged and returns
// false.
// TODO(chandlerc): Find a better way to refactor flag and environment parsing
// out of both gtest-port.cc and gtest.cc to avoid exporting this utility
// function.
bool ParseInt32(const Message& src_text, const char* str, Int32* value);

// Parses a bool/Int32/string from the environment variable
// corresponding to the given Google Test flag.
bool BoolFromGTestEnv(const char* flag, bool default_val);
GTEST_API_ Int32 Int32FromGTestEnv(const char* flag, Int32 default_val);
const char* StringFromGTestEnv(const char* flag, const char* default_val);

}  // namespace internal
}  // namespace testing

#endif  // GTEST_INCLUDE_GTEST_INTERNAL_GTEST_PORT_H_


#if GTEST_OS_LINUX
# include <stdlib.h>
# include <sys/types.h>
# include <sys/wait.h>
# include <unistd.h>
#endif  // GTEST_OS_LINUX

#if GTEST_HAS_EXCEPTIONS
# include <stdexcept>
#endif

#include <ctype.h>
#include <float.h>
#include <string.h>
#include <iomanip>
#include <limits>
#include <map>
#include <set>
#include <string>
#include <vector>

// Copyright 2005, Google Inc.
// All rights reserved.
//
// Redistribution and use in source and binary forms, with or without
// modification, are permitted provided that the following conditions are
// met:
//
//     * Redistributions of source code must retain the above copyright
// notice, this list of conditions and the following disclaimer.
//     * Redistributions in binary form must reproduce the above
// copyright notice, this list of conditions and the following disclaimer
// in the documentation and/or other materials provided with the
// distribution.
//     * Neither the name of Google Inc. nor the names of its
// contributors may be used to endorse or promote products derived from
// this software without specific prior written permission.
//
// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
// "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
// LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
// A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
// OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
// SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
// LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
// DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
// THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
//
// Author: wan@google.com (Zhanyong Wan)
//
// The Google C++ Testing Framework (Google Test)
//
// This header file defines the Message class.
//
// IMPORTANT NOTE: Due to limitation of the C++ language, we have to
// leave some internal implementation details in this header file.
// They are clearly marked by comments like this:
//
//   // INTERNAL IMPLEMENTATION - DO NOT USE IN A USER PROGRAM.
//
// Such code is NOT meant to be used by a user directly, and is subject
// to CHANGE WITHOUT NOTICE.  Therefore DO NOT DEPEND ON IT in a user
// program!

#ifndef GTEST_INCLUDE_GTEST_GTEST_MESSAGE_H_
#define GTEST_INCLUDE_GTEST_GTEST_MESSAGE_H_

#include <limits>


// Ensures that there is at least one operator<< in the global namespace.
// See Message& operator<<(...) below for why.
void operator<<(const testing::internal::Secret&, int);

namespace testing {

// The Message class works like an ostream repeater.
//
// Typical usage:
//
//   1. You stream a bunch of values to a Message object.
//      It will remember the text in a stringstream.
//   2. Then you stream the Message object to an ostream.
//      This causes the text in the Message to be streamed
//      to the ostream.
//
// For example;
//
//   testing::Message foo;
//   foo << 1 << " != " << 2;
//   std::cout << foo;
//
// will print "1 != 2".
//
// Message is not intended to be inherited from.  In particular, its
// destructor is not virtual.
//
// Note that stringstream behaves differently in gcc and in MSVC.  You
// can stream a NULL char pointer to it in the former, but not in the
// latter (it causes an access violation if you do).  The Message
// class hides this difference by treating a NULL char pointer as
// "(null)".
class GTEST_API_ Message {
 private:
  // The type of basic IO manipulators (endl, ends, and flush) for
  // narrow streams.
  typedef std::ostream& (*BasicNarrowIoManip)(std::ostream&);

 public:
  // Constructs an empty Message.
  Message();

  // Copy constructor.
  Message(const Message& msg) : ss_(new ::std::stringstream) {  // NOLINT
    *ss_ << msg.GetString();
  }

  // Constructs a Message from a C-string.
  explicit Message(const char* str) : ss_(new ::std::stringstream) {
    *ss_ << str;
  }

#if GTEST_OS_SYMBIAN
  // Streams a value (either a pointer or not) to this object.
  template <typename T>
  inline Message& operator <<(const T& value) {
    StreamHelper(typename internal::is_pointer<T>::type(), value);
    return *this;
  }
#else
  // Streams a non-pointer value to this object.
  template <typename T>
  inline Message& operator <<(const T& val) {
    // Some libraries overload << for STL containers.  These
    // overloads are defined in the global namespace instead of ::std.
    //
    // C++'s symbol lookup rule (i.e. Koenig lookup) says that these
    // overloads are visible in either the std namespace or the global
    // namespace, but not other namespaces, including the testing
    // namespace which Google Test's Message class is in.
    //
    // To allow STL containers (and other types that has a << operator
    // defined in the global namespace) to be used in Google Test
    // assertions, testing::Message must access the custom << operator
    // from the global namespace.  With this using declaration,
    // overloads of << defined in the global namespace and those
    // visible via Koenig lookup are both exposed in this function.
    using ::operator <<;
    *ss_ << val;
    return *this;
  }

  // Streams a pointer value to this object.
  //
  // This function is an overload of the previous one.  When you
  // stream a pointer to a Message, this definition will be used as it
  // is more specialized.  (The C++ Standard, section
  // [temp.func.order].)  If you stream a non-pointer, then the
  // previous definition will be used.
  //
  // The reason for this overload is that streaming a NULL pointer to
  // ostream is undefined behavior.  Depending on the compiler, you
  // may get "0", "(nil)", "(null)", or an access violation.  To
  // ensure consistent result across compilers, we always treat NULL
  // as "(null)".
  template <typename T>
  inline Message& operator <<(T* const& pointer) {  // NOLINT
    if (pointer == NULL) {
      *ss_ << "(null)";
    } else {
      *ss_ << pointer;
    }
    return *this;
  }
#endif  // GTEST_OS_SYMBIAN

  // Since the basic IO manipulators are overloaded for both narrow
  // and wide streams, we have to provide this specialized definition
  // of operator <<, even though its body is the same as the
  // templatized version above.  Without this definition, streaming
  // endl or other basic IO manipulators to Message will confuse the
  // compiler.
  Message& operator <<(BasicNarrowIoManip val) {
    *ss_ << val;
    return *this;
  }

  // Instead of 1/0, we want to see true/false for bool values.
  Message& operator <<(bool b) {
    return *this << (b ? "true" : "false");
  }

  // These two overloads allow streaming a wide C string to a Message
  // using the UTF-8 encoding.
  Message& operator <<(const wchar_t* wide_c_str);
  Message& operator <<(wchar_t* wide_c_str);

#if GTEST_HAS_STD_WSTRING
  // Converts the given wide string to a narrow string using the UTF-8
  // encoding, and streams the result to this Message object.
  Message& operator <<(const ::std::wstring& wstr);
#endif  // GTEST_HAS_STD_WSTRING

#if GTEST_HAS_GLOBAL_WSTRING
  // Converts the given wide string to a narrow string using the UTF-8
  // encoding, and streams the result to this Message object.
  Message& operator <<(const ::wstring& wstr);
#endif  // GTEST_HAS_GLOBAL_WSTRING

  // Gets the text streamed to this object so far as an std::string.
  // Each '\0' character in the buffer is replaced with "\\0".
  //
  // INTERNAL IMPLEMENTATION - DO NOT USE IN A USER PROGRAM.
  std::string GetString() const;

 private:

#if GTEST_OS_SYMBIAN
  // These are needed as the Nokia Symbian Compiler cannot decide between
  // const T& and const T* in a function template. The Nokia compiler _can_
  // decide between class template specializations for T and T*, so a
  // tr1::type_traits-like is_pointer works, and we can overload on that.
  template <typename T>
  inline void StreamHelper(internal::true_type /*is_pointer*/, T* pointer) {
    if (pointer == NULL) {
      *ss_ << "(null)";
    } else {
      *ss_ << pointer;
    }
  }
  template <typename T>
  inline void StreamHelper(internal::false_type /*is_pointer*/,
                           const T& value) {
    // See the comments in Message& operator <<(const T&) above for why
    // we need this using statement.
    using ::operator <<;
    *ss_ << value;
  }
#endif  // GTEST_OS_SYMBIAN

  // We'll hold the text streamed to this object here.
  const internal::scoped_ptr< ::std::stringstream> ss_;

  // We declare (but don't implement) this to prevent the compiler
  // from implementing the assignment operator.
  void operator=(const Message&);
};

// Streams a Message to an ostream.
inline std::ostream& operator <<(std::ostream& os, const Message& sb) {
  return os << sb.GetString();
}

namespace internal {

// Converts a streamable value to an std::string.  A NULL pointer is
// converted to "(null)".  When the input value is a ::string,
// ::std::string, ::wstring, or ::std::wstring object, each NUL
// character in it is replaced with "\\0".
template <typename T>
std::string StreamableToString(const T& streamable) {
  return (Message() << streamable).GetString();
}

}  // namespace internal
}  // namespace testing

#endif  // GTEST_INCLUDE_GTEST_GTEST_MESSAGE_H_
// Copyright 2005, Google Inc.
// All rights reserved.
//
// Redistribution and use in source and binary forms, with or without
// modification, are permitted provided that the following conditions are
// met:
//
//     * Redistributions of source code must retain the above copyright
// notice, this list of conditions and the following disclaimer.
//     * Redistributions in binary form must reproduce the above
// copyright notice, this list of conditions and the following disclaimer
// in the documentation and/or other materials provided with the
// distribution.
//     * Neither the name of Google Inc. nor the names of its
// contributors may be used to endorse or promote products derived from
// this software without specific prior written permission.
//
// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
// "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
// LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
// A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
// OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
// SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
// LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
// DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
// THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
//
// Authors: wan@google.com (Zhanyong Wan), eefacm@gmail.com (Sean Mcafee)
//
// The Google C++ Testing Framework (Google Test)
//
// This header file declares the String class and functions used internally by
// Google Test.  They are subject to change without notice. They should not used
// by code external to Google Test.
//
// This header file is #included by <gtest/internal/gtest-internal.h>.
// It should not be #included by other files.

#ifndef GTEST_INCLUDE_GTEST_INTERNAL_GTEST_STRING_H_
#define GTEST_INCLUDE_GTEST_INTERNAL_GTEST_STRING_H_

#ifdef __BORLANDC__
// string.h is not guaranteed to provide strcpy on C++ Builder.
# include <mem.h>
#endif

#include <string.h>
#include <string>


namespace testing {
namespace internal {

// String - an abstract class holding static string utilities.
class GTEST_API_ String {
 public:
  // Static utility methods

  // Clones a 0-terminated C string, allocating memory using new.  The
  // caller is responsible for deleting the return value using
  // delete[].  Returns the cloned string, or NULL if the input is
  // NULL.
  //
  // This is different from strdup() in string.h, which allocates
  // memory using malloc().
  static const char* CloneCString(const char* c_str);

#if GTEST_OS_WINDOWS_MOBILE
  // Windows CE does not have the 'ANSI' versions of Win32 APIs. To be
  // able to pass strings to Win32 APIs on CE we need to convert them
  // to 'Unicode', UTF-16.

  // Creates a UTF-16 wide string from the given ANSI string, allocating
  // memory using new. The caller is responsible for deleting the return
  // value using delete[]. Returns the wide string, or NULL if the
  // input is NULL.
  //
  // The wide string is created using the ANSI codepage (CP_ACP) to
  // match the behaviour of the ANSI versions of Win32 calls and the
  // C runtime.
  static LPCWSTR AnsiToUtf16(const char* c_str);

  // Creates an ANSI string from the given wide string, allocating
  // memory using new. The caller is responsible for deleting the return
  // value using delete[]. Returns the ANSI string, or NULL if the
  // input is NULL.
  //
  // The returned string is created using the ANSI codepage (CP_ACP) to
  // match the behaviour of the ANSI versions of Win32 calls and the
  // C runtime.
  static const char* Utf16ToAnsi(LPCWSTR utf16_str);
#endif

  // Compares two C strings.  Returns true iff they have the same content.
  //
  // Unlike strcmp(), this function can handle NULL argument(s).  A
  // NULL C string is considered different to any non-NULL C string,
  // including the empty string.
  static bool CStringEquals(const char* lhs, const char* rhs);

  // Converts a wide C string to a String using the UTF-8 encoding.
  // NULL will be converted to "(null)".  If an error occurred during
  // the conversion, "(failed to convert from wide string)" is
  // returned.
  static std::string ShowWideCString(const wchar_t* wide_c_str);

  // Compares two wide C strings.  Returns true iff they have the same
  // content.
  //
  // Unlike wcscmp(), this function can handle NULL argument(s).  A
  // NULL C string is considered different to any non-NULL C string,
  // including the empty string.
  static bool WideCStringEquals(const wchar_t* lhs, const wchar_t* rhs);

  // Compares two C strings, ignoring case.  Returns true iff they
  // have the same content.
  //
  // Unlike strcasecmp(), this function can handle NULL argument(s).
  // A NULL C string is considered different to any non-NULL C string,
  // including the empty string.
  static bool CaseInsensitiveCStringEquals(const char* lhs,
                                           const char* rhs);

  // Compares two wide C strings, ignoring case.  Returns true iff they
  // have the same content.
  //
  // Unlike wcscasecmp(), this function can handle NULL argument(s).
  // A NULL C string is considered different to any non-NULL wide C string,
  // including the empty string.
  // NB: The implementations on different platforms slightly differ.
  // On windows, this method uses _wcsicmp which compares according to LC_CTYPE
  // environment variable. On GNU platform this method uses wcscasecmp
  // which compares according to LC_CTYPE category of the current locale.
  // On MacOS X, it uses towlower, which also uses LC_CTYPE category of the
  // current locale.
  static bool CaseInsensitiveWideCStringEquals(const wchar_t* lhs,
                                               const wchar_t* rhs);

  // Returns true iff the given string ends with the given suffix, ignoring
  // case. Any string is considered to end with an empty suffix.
  static bool EndsWithCaseInsensitive(
      const std::string& str, const std::string& suffix);

  // Formats an int value as "%02d".
  static std::string FormatIntWidth2(int value);  // "%02d" for width == 2

  // Formats an int value as "%X".
  static std::string FormatHexInt(int value);

  // Formats a byte as "%02X".
  static std::string FormatByte(unsigned char value);

 private:
  String();  // Not meant to be instantiated.
};  // class String

// Gets the content of the stringstream's buffer as an std::string.  Each '\0'
// character in the buffer is replaced with "\\0".
GTEST_API_ std::string StringStreamToString(::std::stringstream* stream);

}  // namespace internal
}  // namespace testing

#endif  // GTEST_INCLUDE_GTEST_INTERNAL_GTEST_STRING_H_
// Copyright 2008, Google Inc.
// All rights reserved.
//
// Redistribution and use in source and binary forms, with or without
// modification, are permitted provided that the following conditions are
// met:
//
//     * Redistributions of source code must retain the above copyright
// notice, this list of conditions and the following disclaimer.
//     * Redistributions in binary form must reproduce the above
// copyright notice, this list of conditions and the following disclaimer
// in the documentation and/or other materials provided with the
// distribution.
//     * Neither the name of Google Inc. nor the names of its
// contributors may be used to endorse or promote products derived from
// this software without specific prior written permission.
//
// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
// "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
// LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
// A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
// OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
// SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
// LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
// DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
// THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
//
// Author: keith.ray@gmail.com (Keith Ray)
//
// Google Test filepath utilities
//
// This header file declares classes and functions used internally by
// Google Test.  They are subject to change without notice.
//
// This file is #included in <gtest/internal/gtest-internal.h>.
// Do not include this header file separately!

#ifndef GTEST_INCLUDE_GTEST_INTERNAL_GTEST_FILEPATH_H_
#define GTEST_INCLUDE_GTEST_INTERNAL_GTEST_FILEPATH_H_


namespace testing {
namespace internal {

// FilePath - a class for file and directory pathname manipulation which
// handles platform-specific conventions (like the pathname separator).
// Used for helper functions for naming files in a directory for xml output.
// Except for Set methods, all methods are const or static, which provides an
// "immutable value object" -- useful for peace of mind.
// A FilePath with a value ending in a path separator ("like/this/") represents
// a directory, otherwise it is assumed to represent a file. In either case,
// it may or may not represent an actual file or directory in the file system.
// Names are NOT checked for syntax correctness -- no checking for illegal
// characters, malformed paths, etc.

class GTEST_API_ FilePath {
 public:
  FilePath() : pathname_("") { }
  FilePath(const FilePath& rhs) : pathname_(rhs.pathname_) { }

  explicit FilePath(const std::string& pathname) : pathname_(pathname) {
    Normalize();
  }

  FilePath& operator=(const FilePath& rhs) {
    Set(rhs);
    return *this;
  }

  void Set(const FilePath& rhs) {
    pathname_ = rhs.pathname_;
  }

  const std::string& string() const { return pathname_; }
  const char* c_str() const { return pathname_.c_str(); }

  // Returns the current working directory, or "" if unsuccessful.
  static FilePath GetCurrentDir();

  // Given directory = "dir", base_name = "test", number = 0,
  // extension = "xml", returns "dir/test.xml". If number is greater
  // than zero (e.g., 12), returns "dir/test_12.xml".
  // On Windows platform, uses \ as the separator rather than /.
  static FilePath MakeFileName(const FilePath& directory,
                               const FilePath& base_name,
                               int number,
                               const char* extension);

  // Given directory = "dir", relative_path = "test.xml",
  // returns "dir/test.xml".
  // On Windows, uses \ as the separator rather than /.
  static FilePath ConcatPaths(const FilePath& directory,
                              const FilePath& relative_path);

  // Returns a pathname for a file that does not currently exist. The pathname
  // will be directory/base_name.extension or
  // directory/base_name_<number>.extension if directory/base_name.extension
  // already exists. The number will be incremented until a pathname is found
  // that does not already exist.
  // Examples: 'dir/foo_test.xml' or 'dir/foo_test_1.xml'.
  // There could be a race condition if two or more processes are calling this
  // function at the same time -- they could both pick the same filename.
  static FilePath GenerateUniqueFileName(const FilePath& directory,
                                         const FilePath& base_name,
                                         const char* extension);

  // Returns true iff the path is "".
  bool IsEmpty() const { return pathname_.empty(); }

  // If input name has a trailing separator character, removes it and returns
  // the name, otherwise return the name string unmodified.
  // On Windows platform, uses \ as the separator, other platforms use /.
  FilePath RemoveTrailingPathSeparator() const;

  // Returns a copy of the FilePath with the directory part removed.
  // Example: FilePath("path/to/file").RemoveDirectoryName() returns
  // FilePath("file"). If there is no directory part ("just_a_file"), it returns
  // the FilePath unmodified. If there is no file part ("just_a_dir/") it
  // returns an empty FilePath ("").
  // On Windows platform, '\' is the path separator, otherwise it is '/'.
  FilePath RemoveDirectoryName() const;

  // RemoveFileName returns the directory path with the filename removed.
  // Example: FilePath("path/to/file").RemoveFileName() returns "path/to/".
  // If the FilePath is "a_file" or "/a_file", RemoveFileName returns
  // FilePath("./") or, on Windows, FilePath(".\\"). If the filepath does
  // not have a file, like "just/a/dir/", it returns the FilePath unmodified.
  // On Windows platform, '\' is the path separator, otherwise it is '/'.
  FilePath RemoveFileName() const;

  // Returns a copy of the FilePath with the case-insensitive extension removed.
  // Example: FilePath("dir/file.exe").RemoveExtension("EXE") returns
  // FilePath("dir/file"). If a case-insensitive extension is not
  // found, returns a copy of the original FilePath.
  FilePath RemoveExtension(const char* extension) const;

  // Creates directories so that path exists. Returns true if successful or if
  // the directories already exist; returns false if unable to create
  // directories for any reason. Will also return false if the FilePath does
  // not represent a directory (that is, it doesn't end with a path separator).
  bool CreateDirectoriesRecursively() const;

  // Create the directory so that path exists. Returns true if successful or
  // if the directory already exists; returns false if unable to create the
  // directory for any reason, including if the parent directory does not
  // exist. Not named "CreateDirectory" because that's a macro on Windows.
  bool CreateFolder() const;

  // Returns true if FilePath describes something in the file-system,
  // either a file, directory, or whatever, and that something exists.
  bool FileOrDirectoryExists() const;

  // Returns true if pathname describes a directory in the file-system
  // that exists.
  bool DirectoryExists() const;

  // Returns true if FilePath ends with a path separator, which indicates that
  // it is intended to represent a directory. Returns false otherwise.
  // This does NOT check that a directory (or file) actually exists.
  bool IsDirectory() const;

  // Returns true if pathname describes a root directory. (Windows has one
  // root directory per disk drive.)
  bool IsRootDirectory() const;

  // Returns true if pathname describes an absolute path.
  bool IsAbsolutePath() const;

 private:
  // Replaces multiple consecutive separators with a single separator.
  // For example, "bar///foo" becomes "bar/foo". Does not eliminate other
  // redundancies that might be in a pathname involving "." or "..".
  //
  // A pathname with multiple consecutive separators may occur either through
  // user error or as a result of some scripts or APIs that generate a pathname
  // with a trailing separator. On other platforms the same API or script
  // may NOT generate a pathname with a trailing "/". Then elsewhere that
  // pathname may have another "/" and pathname components added to it,
  // without checking for the separator already being there.
  // The script language and operating system may allow paths like "foo//bar"
  // but some of the functions in FilePath will not handle that correctly. In
  // particular, RemoveTrailingPathSeparator() only removes one separator, and
  // it is called in CreateDirectoriesRecursively() assuming that it will change
  // a pathname from directory syntax (trailing separator) to filename syntax.
  //
  // On Windows this method also replaces the alternate path separator '/' with
  // the primary path separator '\\', so that for example "bar\\/\\foo" becomes
  // "bar\\foo".

  void Normalize();

  // Returns a pointer to the last occurence of a valid path separator in
  // the FilePath. On Windows, for example, both '/' and '\' are valid path
  // separators. Returns NULL if no path separator was found.
  const char* FindLastPathSeparator() const;

  std::string pathname_;
};  // class FilePath

}  // namespace internal
}  // namespace testing

#endif  // GTEST_INCLUDE_GTEST_INTERNAL_GTEST_FILEPATH_H_
// This file was GENERATED by command:
//     pump.py gtest-type-util.h.pump
// DO NOT EDIT BY HAND!!!

// Copyright 2008 Google Inc.
// All Rights Reserved.
//
// Redistribution and use in source and binary forms, with or without
// modification, are permitted provided that the following conditions are
// met:
//
//     * Redistributions of source code must retain the above copyright
// notice, this list of conditions and the following disclaimer.
//     * Redistributions in binary form must reproduce the above
// copyright notice, this list of conditions and the following disclaimer
// in the documentation and/or other materials provided with the
// distribution.
//     * Neither the name of Google Inc. nor the names of its
// contributors may be used to endorse or promote products derived from
// this software without specific prior written permission.
//
// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
// "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
// LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
// A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
// OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
// SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
// LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
// DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
// THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
//
// Author: wan@google.com (Zhanyong Wan)

// Type utilities needed for implementing typed and type-parameterized
// tests.  This file is generated by a SCRIPT.  DO NOT EDIT BY HAND!
//
// Currently we support at most 50 types in a list, and at most 50
// type-parameterized tests in one type-parameterized test case.
// Please contact googletestframework@googlegroups.com if you need
// more.

#ifndef GTEST_INCLUDE_GTEST_INTERNAL_GTEST_TYPE_UTIL_H_
#define GTEST_INCLUDE_GTEST_INTERNAL_GTEST_TYPE_UTIL_H_


// #ifdef __GNUC__ is too general here.  It is possible to use gcc without using
// libstdc++ (which is where cxxabi.h comes from).
# if GTEST_HAS_CXXABI_H_
#  include <cxxabi.h>
# elif defined(__HP_aCC)
#  include <acxx_demangle.h>
# endif  // GTEST_HASH_CXXABI_H_

namespace testing {
namespace internal {

// GetTypeName<T>() returns a human-readable name of type T.
// NB: This function is also used in Google Mock, so don't move it inside of
// the typed-test-only section below.
template <typename T>
std::string GetTypeName() {
# if GTEST_HAS_RTTI

  const char* const name = typeid(T).name();
#  if GTEST_HAS_CXXABI_H_ || defined(__HP_aCC)
  int status = 0;
  // gcc's implementation of typeid(T).name() mangles the type name,
  // so we have to demangle it.
#   if GTEST_HAS_CXXABI_H_
  using abi::__cxa_demangle;
#   endif  // GTEST_HAS_CXXABI_H_
  char* const readable_name = __cxa_demangle(name, 0, 0, &status);
  const std::string name_str(status == 0 ? readable_name : name);
  free(readable_name);
  return name_str;
#  else
  return name;
#  endif  // GTEST_HAS_CXXABI_H_ || __HP_aCC

# else

  return "<type>";

# endif  // GTEST_HAS_RTTI
}

#if GTEST_HAS_TYPED_TEST || GTEST_HAS_TYPED_TEST_P

// AssertyTypeEq<T1, T2>::type is defined iff T1 and T2 are the same
// type.  This can be used as a compile-time assertion to ensure that
// two types are equal.

template <typename T1, typename T2>
struct AssertTypeEq;

template <typename T>
struct AssertTypeEq<T, T> {
  typedef bool type;
};

// A unique type used as the default value for the arguments of class
// template Types.  This allows us to simulate variadic templates
// (e.g. Types<int>, Type<int, double>, and etc), which C++ doesn't
// support directly.
struct None {};

// The following family of struct and struct templates are used to
// represent type lists.  In particular, TypesN<T1, T2, ..., TN>
// represents a type list with N types (T1, T2, ..., and TN) in it.
// Except for Types0, every struct in the family has two member types:
// Head for the first type in the list, and Tail for the rest of the
// list.

// The empty type list.
struct Types0 {};

// Type lists of length 1, 2, 3, and so on.

template <typename T1>
struct Types1 {
  typedef T1 Head;
  typedef Types0 Tail;
};
template <typename T1, typename T2>
struct Types2 {
  typedef T1 Head;
  typedef Types1<T2> Tail;
};

template <typename T1, typename T2, typename T3>
struct Types3 {
  typedef T1 Head;
  typedef Types2<T2, T3> Tail;
};

template <typename T1, typename T2, typename T3, typename T4>
struct Types4 {
  typedef T1 Head;
  typedef Types3<T2, T3, T4> Tail;
};

template <typename T1, typename T2, typename T3, typename T4, typename T5>
struct Types5 {
  typedef T1 Head;
  typedef Types4<T2, T3, T4, T5> Tail;
};

template <typename T1, typename T2, typename T3, typename T4, typename T5,
    typename T6>
struct Types6 {
  typedef T1 Head;
  typedef Types5<T2, T3, T4, T5, T6> Tail;
};

template <typename T1, typename T2, typename T3, typename T4, typename T5,
    typename T6, typename T7>
struct Types7 {
  typedef T1 Head;
  typedef Types6<T2, T3, T4, T5, T6, T7> Tail;
};

template <typename T1, typename T2, typename T3, typename T4, typename T5,
    typename T6, typename T7, typename T8>
struct Types8 {
  typedef T1 Head;
  typedef Types7<T2, T3, T4, T5, T6, T7, T8> Tail;
};

template <typename T1, typename T2, typename T3, typename T4, typename T5,
    typename T6, typename T7, typename T8, typename T9>
struct Types9 {
  typedef T1 Head;
  typedef Types8<T2, T3, T4, T5, T6, T7, T8, T9> Tail;
};

template <typename T1, typename T2, typename T3, typename T4, typename T5,
    typename T6, typename T7, typename T8, typename T9, typename T10>
struct Types10 {
  typedef T1 Head;
  typedef Types9<T2, T3, T4, T5, T6, T7, T8, T9, T10> Tail;
};

template <typename T1, typename T2, typename T3, typename T4, typename T5,
    typename T6, typename T7, typename T8, typename T9, typename T10,
    typename T11>
struct Types11 {
  typedef T1 Head;
  typedef Types10<T2, T3, T4, T5, T6, T7, T8, T9, T10, T11> Tail;
};

template <typename T1, typename T2, typename T3, typename T4, typename T5,
    typename T6, typename T7, typename T8, typename T9, typename T10,
    typename T11, typename T12>
struct Types12 {
  typedef T1 Head;
  typedef Types11<T2, T3, T4, T5, T6, T7, T8, T9, T10, T11, T12> Tail;
};

template <typename T1, typename T2, typename T3, typename T4, typename T5,
    typename T6, typename T7, typename T8, typename T9, typename T10,
    typename T11, typename T12, typename T13>
struct Types13 {
  typedef T1 Head;
  typedef Types12<T2, T3, T4, T5, T6, T7, T8, T9, T10, T11, T12, T13> Tail;
};

template <typename T1, typename T2, typename T3, typename T4, typename T5,
    typename T6, typename T7, typename T8, typename T9, typename T10,
    typename T11, typename T12, typename T13, typename T14>
struct Types14 {
  typedef T1 Head;
  typedef Types13<T2, T3, T4, T5, T6, T7, T8, T9, T10, T11, T12, T13, T14> Tail;
};

template <typename T1, typename T2, typename T3, typename T4, typename T5,
    typename T6, typename T7, typename T8, typename T9, typename T10,
    typename T11, typename T12, typename T13, typename T14, typename T15>
struct Types15 {
  typedef T1 Head;
  typedef Types14<T2, T3, T4, T5, T6, T7, T8, T9, T10, T11, T12, T13, T14,
      T15> Tail;
};

template <typename T1, typename T2, typename T3, typename T4, typename T5,
    typename T6, typename T7, typename T8, typename T9, typename T10,
    typename T11, typename T12, typename T13, typename T14, typename T15,
    typename T16>
struct Types16 {
  typedef T1 Head;
  typedef Types15<T2, T3, T4, T5, T6, T7, T8, T9, T10, T11, T12, T13, T14, T15,
      T16> Tail;
};

template <typename T1, typename T2, typename T3, typename T4, typename T5,
    typename T6, typename T7, typename T8, typename T9, typename T10,
    typename T11, typename T12, typename T13, typename T14, typename T15,
    typename T16, typename T17>
struct Types17 {
  typedef T1 Head;
  typedef Types16<T2, T3, T4, T5, T6, T7, T8, T9, T10, T11, T12, T13, T14, T15,
      T16, T17> Tail;
};

template <typename T1, typename T2, typename T3, typename T4, typename T5,
    typename T6, typename T7, typename T8, typename T9, typename T10,
    typename T11, typename T12, typename T13, typename T14, typename T15,
    typename T16, typename T17, typename T18>
struct Types18 {
  typedef T1 Head;
  typedef Types17<T2, T3, T4, T5, T6, T7, T8, T9, T10, T11, T12, T13, T14, T15,
      T16, T17, T18> Tail;
};

template <typename T1, typename T2, typename T3, typename T4, typename T5,
    typename T6, typename T7, typename T8, typename T9, typename T10,
    typename T11, typename T12, typename T13, typename T14, typename T15,
    typename T16, typename T17, typename T18, typename T19>
struct Types19 {
  typedef T1 Head;
  typedef Types18<T2, T3, T4, T5, T6, T7, T8, T9, T10, T11, T12, T13, T14, T15,
      T16, T17, T18, T19> Tail;
};

template <typename T1, typename T2, typename T3, typename T4, typename T5,
    typename T6, typename T7, typename T8, typename T9, typename T10,
    typename T11, typename T12, typename T13, typename T14, typename T15,
    typename T16, typename T17, typename T18, typename T19, typename T20>
struct Types20 {
  typedef T1 Head;
  typedef Types19<T2, T3, T4, T5, T6, T7, T8, T9, T10, T11, T12, T13, T14, T15,
      T16, T17, T18, T19, T20> Tail;
};

template <typename T1, typename T2, typename T3, typename T4, typename T5,
    typename T6, typename T7, typename T8, typename T9, typename T10,
    typename T11, typename T12, typename T13, typename T14, typename T15,
    typename T16, typename T17, typename T18, typename T19, typename T20,
    typename T21>
struct Types21 {
  typedef T1 Head;
  typedef Types20<T2, T3, T4, T5, T6, T7, T8, T9, T10, T11, T12, T13, T14, T15,
      T16, T17, T18, T19, T20, T21> Tail;
};

template <typename T1, typename T2, typename T3, typename T4, typename T5,
    typename T6, typename T7, typename T8, typename T9, typename T10,
    typename T11, typename T12, typename T13, typename T14, typename T15,
    typename T16, typename T17, typename T18, typename T19, typename T20,
    typename T21, typename T22>
struct Types22 {
  typedef T1 Head;
  typedef Types21<T2, T3, T4, T5, T6, T7, T8, T9, T10, T11, T12, T13, T14, T15,
      T16, T17, T18, T19, T20, T21, T22> Tail;
};

template <typename T1, typename T2, typename T3, typename T4, typename T5,
    typename T6, typename T7, typename T8, typename T9, typename T10,
    typename T11, typename T12, typename T13, typename T14, typename T15,
    typename T16, typename T17, typename T18, typename T19, typename T20,
    typename T21, typename T22, typename T23>
struct Types23 {
  typedef T1 Head;
  typedef Types22<T2, T3, T4, T5, T6, T7, T8, T9, T10, T11, T12, T13, T14, T15,
      T16, T17, T18, T19, T20, T21, T22, T23> Tail;
};

template <typename T1, typename T2, typename T3, typename T4, typename T5,
    typename T6, typename T7, typename T8, typename T9, typename T10,
    typename T11, typename T12, typename T13, typename T14, typename T15,
    typename T16, typename T17, typename T18, typename T19, typename T20,
    typename T21, typename T22, typename T23, typename T24>
struct Types24 {
  typedef T1 Head;
  typedef Types23<T2, T3, T4, T5, T6, T7, T8, T9, T10, T11, T12, T13, T14, T15,
      T16, T17, T18, T19, T20, T21, T22, T23, T24> Tail;
};

template <typename T1, typename T2, typename T3, typename T4, typename T5,
    typename T6, typename T7, typename T8, typename T9, typename T10,
    typename T11, typename T12, typename T13, typename T14, typename T15,
    typename T16, typename T17, typename T18, typename T19, typename T20,
    typename T21, typename T22, typename T23, typename T24, typename T25>
struct Types25 {
  typedef T1 Head;
  typedef Types24<T2, T3, T4, T5, T6, T7, T8, T9, T10, T11, T12, T13, T14, T15,
      T16, T17, T18, T19, T20, T21, T22, T23, T24, T25> Tail;
};

template <typename T1, typename T2, typename T3, typename T4, typename T5,
    typename T6, typename T7, typename T8, typename T9, typename T10,
    typename T11, typename T12, typename T13, typename T14, typename T15,
    typename T16, typename T17, typename T18, typename T19, typename T20,
    typename T21, typename T22, typename T23, typename T24, typename T25,
    typename T26>
struct Types26 {
  typedef T1 Head;
  typedef Types25<T2, T3, T4, T5, T6, T7, T8, T9, T10, T11, T12, T13, T14, T15,
      T16, T17, T18, T19, T20, T21, T22, T23, T24, T25, T26> Tail;
};

template <typename T1, typename T2, typename T3, typename T4, typename T5,
    typename T6, typename T7, typename T8, typename T9, typename T10,
    typename T11, typename T12, typename T13, typename T14, typename T15,
    typename T16, typename T17, typename T18, typename T19, typename T20,
    typename T21, typename T22, typename T23, typename T24, typename T25,
    typename T26, typename T27>
struct Types27 {
  typedef T1 Head;
  typedef Types26<T2, T3, T4, T5, T6, T7, T8, T9, T10, T11, T12, T13, T14, T15,
      T16, T17, T18, T19, T20, T21, T22, T23, T24, T25, T26, T27> Tail;
};

template <typename T1, typename T2, typename T3, typename T4, typename T5,
    typename T6, typename T7, typename T8, typename T9, typename T10,
    typename T11, typename T12, typename T13, typename T14, typename T15,
    typename T16, typename T17, typename T18, typename T19, typename T20,
    typename T21, typename T22, typename T23, typename T24, typename T25,
    typename T26, typename T27, typename T28>
struct Types28 {
  typedef T1 Head;
  typedef Types27<T2, T3, T4, T5, T6, T7, T8, T9, T10, T11, T12, T13, T14, T15,
      T16, T17, T18, T19, T20, T21, T22, T23, T24, T25, T26, T27, T28> Tail;
};

template <typename T1, typename T2, typename T3, typename T4, typename T5,
    typename T6, typename T7, typename T8, typename T9, typename T10,
    typename T11, typename T12, typename T13, typename T14, typename T15,
    typename T16, typename T17, typename T18, typename T19, typename T20,
    typename T21, typename T22, typename T23, typename T24, typename T25,
    typename T26, typename T27, typename T28, typename T29>
struct Types29 {
  typedef T1 Head;
  typedef Types28<T2, T3, T4, T5, T6, T7, T8, T9, T10, T11, T12, T13, T14, T15,
      T16, T17, T18, T19, T20, T21, T22, T23, T24, T25, T26, T27, T28,
      T29> Tail;
};

template <typename T1, typename T2, typename T3, typename T4, typename T5,
    typename T6, typename T7, typename T8, typename T9, typename T10,
    typename T11, typename T12, typename T13, typename T14, typename T15,
    typename T16, typename T17, typename T18, typename T19, typename T20,
    typename T21, typename T22, typename T23, typename T24, typename T25,
    typename T26, typename T27, typename T28, typename T29, typename T30>
struct Types30 {
  typedef T1 Head;
  typedef Types29<T2, T3, T4, T5, T6, T7, T8, T9, T10, T11, T12, T13, T14, T15,
      T16, T17, T18, T19, T20, T21, T22, T23, T24, T25, T26, T27, T28, T29,
      T30> Tail;
};

template <typename T1, typename T2, typename T3, typename T4, typename T5,
    typename T6, typename T7, typename T8, typename T9, typename T10,
    typename T11, typename T12, typename T13, typename T14, typename T15,
    typename T16, typename T17, typename T18, typename T19, typename T20,
    typename T21, typename T22, typename T23, typename T24, typename T25,
    typename T26, typename T27, typename T28, typename T29, typename T30,
    typename T31>
struct Types31 {
  typedef T1 Head;
  typedef Types30<T2, T3, T4, T5, T6, T7, T8, T9, T10, T11, T12, T13, T14, T15,
      T16, T17, T18, T19, T20, T21, T22, T23, T24, T25, T26, T27, T28, T29,
      T30, T31> Tail;
};

template <typename T1, typename T2, typename T3, typename T4, typename T5,
    typename T6, typename T7, typename T8, typename T9, typename T10,
    typename T11, typename T12, typename T13, typename T14, typename T15,
    typename T16, typename T17, typename T18, typename T19, typename T20,
    typename T21, typename T22, typename T23, typename T24, typename T25,
    typename T26, typename T27, typename T28, typename T29, typename T30,
    typename T31, typename T32>
struct Types32 {
  typedef T1 Head;
  typedef Types31<T2, T3, T4, T5, T6, T7, T8, T9, T10, T11, T12, T13, T14, T15,
      T16, T17, T18, T19, T20, T21, T22, T23, T24, T25, T26, T27, T28, T29,
      T30, T31, T32> Tail;
};

template <typename T1, typename T2, typename T3, typename T4, typename T5,
    typename T6, typename T7, typename T8, typename T9, typename T10,
    typename T11, typename T12, typename T13, typename T14, typename T15,
    typename T16, typename T17, typename T18, typename T19, typename T20,
    typename T21, typename T22, typename T23, typename T24, typename T25,
    typename T26, typename T27, typename T28, typename T29, typename T30,
    typename T31, typename T32, typename T33>
struct Types33 {
  typedef T1 Head;
  typedef Types32<T2, T3, T4, T5, T6, T7, T8, T9, T10, T11, T12, T13, T14, T15,
      T16, T17, T18, T19, T20, T21, T22, T23, T24, T25, T26, T27, T28, T29,
      T30, T31, T32, T33> Tail;
};

template <typename T1, typename T2, typename T3, typename T4, typename T5,
    typename T6, typename T7, typename T8, typename T9, typename T10,
    typename T11, typename T12, typename T13, typename T14, typename T15,
    typename T16, typename T17, typename T18, typename T19, typename T20,
    typename T21, typename T22, typename T23, typename T24, typename T25,
    typename T26, typename T27, typename T28, typename T29, typename T30,
    typename T31, typename T32, typename T33, typename T34>
struct Types34 {
  typedef T1 Head;
  typedef Types33<T2, T3, T4, T5, T6, T7, T8, T9, T10, T11, T12, T13, T14, T15,
      T16, T17, T18, T19, T20, T21, T22, T23, T24, T25, T26, T27, T28, T29,
      T30, T31, T32, T33, T34> Tail;
};

template <typename T1, typename T2, typename T3, typename T4, typename T5,
    typename T6, typename T7, typename T8, typename T9, typename T10,
    typename T11, typename T12, typename T13, typename T14, typename T15,
    typename T16, typename T17, typename T18, typename T19, typename T20,
    typename T21, typename T22, typename T23, typename T24, typename T25,
    typename T26, typename T27, typename T28, typename T29, typename T30,
    typename T31, typename T32, typename T33, typename T34, typename T35>
struct Types35 {
  typedef T1 Head;
  typedef Types34<T2, T3, T4, T5, T6, T7, T8, T9, T10, T11, T12, T13, T14, T15,
      T16, T17, T18, T19, T20, T21, T22, T23, T24, T25, T26, T27, T28, T29,
      T30, T31, T32, T33, T34, T35> Tail;
};

template <typename T1, typename T2, typename T3, typename T4, typename T5,
    typename T6, typename T7, typename T8, typename T9, typename T10,
    typename T11, typename T12, typename T13, typename T14, typename T15,
    typename T16, typename T17, typename T18, typename T19, typename T20,
    typename T21, typename T22, typename T23, typename T24, typename T25,
    typename T26, typename T27, typename T28, typename T29, typename T30,
    typename T31, typename T32, typename T33, typename T34, typename T35,
    typename T36>
struct Types36 {
  typedef T1 Head;
  typedef Types35<T2, T3, T4, T5, T6, T7, T8, T9, T10, T11, T12, T13, T14, T15,
      T16, T17, T18, T19, T20, T21, T22, T23, T24, T25, T26, T27, T28, T29,
      T30, T31, T32, T33, T34, T35, T36> Tail;
};

template <typename T1, typename T2, typename T3, typename T4, typename T5,
    typename T6, typename T7, typename T8, typename T9, typename T10,
    typename T11, typename T12, typename T13, typename T14, typename T15,
    typename T16, typename T17, typename T18, typename T19, typename T20,
    typename T21, typename T22, typename T23, typename T24, typename T25,
    typename T26, typename T27, typename T28, typename T29, typename T30,
    typename T31, typename T32, typename T33, typename T34, typename T35,
    typename T36, typename T37>
struct Types37 {
  typedef T1 Head;
  typedef Types36<T2, T3, T4, T5, T6, T7, T8, T9, T10, T11, T12, T13, T14, T15,
      T16, T17, T18, T19, T20, T21, T22, T23, T24, T25, T26, T27, T28, T29,
      T30, T31, T32, T33, T34, T35, T36, T37> Tail;
};

template <typename T1, typename T2, typename T3, typename T4, typename T5,
    typename T6, typename T7, typename T8, typename T9, typename T10,
    typename T11, typename T12, typename T13, typename T14, typename T15,
    typename T16, typename T17, typename T18, typename T19, typename T20,
    typename T21, typename T22, typename T23, typename T24, typename T25,
    typename T26, typename T27, typename T28, typename T29, typename T30,
    typename T31, typename T32, typename T33, typename T34, typename T35,
    typename T36, typename T37, typename T38>
struct Types38 {
  typedef T1 Head;
  typedef Types37<T2, T3, T4, T5, T6, T7, T8, T9, T10, T11, T12, T13, T14, T15,
      T16, T17, T18, T19, T20, T21, T22, T23, T24, T25, T26, T27, T28, T29,
      T30, T31, T32, T33, T34, T35, T36, T37, T38> Tail;
};

template <typename T1, typename T2, typename T3, typename T4, typename T5,
    typename T6, typename T7, typename T8, typename T9, typename T10,
    typename T11, typename T12, typename T13, typename T14, typename T15,
    typename T16, typename T17, typename T18, typename T19, typename T20,
    typename T21, typename T22, typename T23, typename T24, typename T25,
    typename T26, typename T27, typename T28, typename T29, typename T30,
    typename T31, typename T32, typename T33, typename T34, typename T35,
    typename T36, typename T37, typename T38, typename T39>
struct Types39 {
  typedef T1 Head;
  typedef Types38<T2, T3, T4, T5, T6, T7, T8, T9, T10, T11, T12, T13, T14, T15,
      T16, T17, T18, T19, T20, T21, T22, T23, T24, T25, T26, T27, T28, T29,
      T30, T31, T32, T33, T34, T35, T36, T37, T38, T39> Tail;
};

template <typename T1, typename T2, typename T3, typename T4, typename T5,
    typename T6, typename T7, typename T8, typename T9, typename T10,
    typename T11, typename T12, typename T13, typename T14, typename T15,
    typename T16, typename T17, typename T18, typename T19, typename T20,
    typename T21, typename T22, typename T23, typename T24, typename T25,
    typename T26, typename T27, typename T28, typename T29, typename T30,
    typename T31, typename T32, typename T33, typename T34, typename T35,
    typename T36, typename T37, typename T38, typename T39, typename T40>
struct Types40 {
  typedef T1 Head;
  typedef Types39<T2, T3, T4, T5, T6, T7, T8, T9, T10, T11, T12, T13, T14, T15,
      T16, T17, T18, T19, T20, T21, T22, T23, T24, T25, T26, T27, T28, T29,
      T30, T31, T32, T33, T34, T35, T36, T37, T38, T39, T40> Tail;
};

template <typename T1, typename T2, typename T3, typename T4, typename T5,
    typename T6, typename T7, typename T8, typename T9, typename T10,
    typename T11, typename T12, typename T13, typename T14, typename T15,
    typename T16, typename T17, typename T18, typename T19, typename T20,
    typename T21, typename T22, typename T23, typename T24, typename T25,
    typename T26, typename T27, typename T28, typename T29, typename T30,
    typename T31, typename T32, typename T33, typename T34, typename T35,
    typename T36, typename T37, typename T38, typename T39, typename T40,
    typename T41>
struct Types41 {
  typedef T1 Head;
  typedef Types40<T2, T3, T4, T5, T6, T7, T8, T9, T10, T11, T12, T13, T14, T15,
      T16, T17, T18, T19, T20, T21, T22, T23, T24, T25, T26, T27, T28, T29,
      T30, T31, T32, T33, T34, T35, T36, T37, T38, T39, T40, T41> Tail;
};

template <typename T1, typename T2, typename T3, typename T4, typename T5,
    typename T6, typename T7, typename T8, typename T9, typename T10,
    typename T11, typename T12, typename T13, typename T14, typename T15,
    typename T16, typename T17, typename T18, typename T19, typename T20,
    typename T21, typename T22, typename T23, typename T24, typename T25,
    typename T26, typename T27, typename T28, typename T29, typename T30,
    typename T31, typename T32, typename T33, typename T34, typename T35,
    typename T36, typename T37, typename T38, typename T39, typename T40,
    typename T41, typename T42>
struct Types42 {
  typedef T1 Head;
  typedef Types41<T2, T3, T4, T5, T6, T7, T8, T9, T10, T11, T12, T13, T14, T15,
      T16, T17, T18, T19, T20, T21, T22, T23, T24, T25, T26, T27, T28, T29,
      T30, T31, T32, T33, T34, T35, T36, T37, T38, T39, T40, T41, T42> Tail;
};

template <typename T1, typename T2, typename T3, typename T4, typename T5,
    typename T6, typename T7, typename T8, typename T9, typename T10,
    typename T11, typename T12, typename T13, typename T14, typename T15,
    typename T16, typename T17, typename T18, typename T19, typename T20,
    typename T21, typename T22, typename T23, typename T24, typename T25,
    typename T26, typename T27, typename T28, typename T29, typename T30,
    typename T31, typename T32, typename T33, typename T34, typename T35,
    typename T36, typename T37, typename T38, typename T39, typename T40,
    typename T41, typename T42, typename T43>
struct Types43 {
  typedef T1 Head;
  typedef Types42<T2, T3, T4, T5, T6, T7, T8, T9, T10, T11, T12, T13, T14, T15,
      T16, T17, T18, T19, T20, T21, T22, T23, T24, T25, T26, T27, T28, T29,
      T30, T31, T32, T33, T34, T35, T36, T37, T38, T39, T40, T41, T42,
      T43> Tail;
};

template <typename T1, typename T2, typename T3, typename T4, typename T5,
    typename T6, typename T7, typename T8, typename T9, typename T10,
    typename T11, typename T12, typename T13, typename T14, typename T15,
    typename T16, typename T17, typename T18, typename T19, typename T20,
    typename T21, typename T22, typename T23, typename T24, typename T25,
    typename T26, typename T27, typename T28, typename T29, typename T30,
    typename T31, typename T32, typename T33, typename T34, typename T35,
    typename T36, typename T37, typename T38, typename T39, typename T40,
    typename T41, typename T42, typename T43, typename T44>
struct Types44 {
  typedef T1 Head;
  typedef Types43<T2, T3, T4, T5, T6, T7, T8, T9, T10, T11, T12, T13, T14, T15,
      T16, T17, T18, T19, T20, T21, T22, T23, T24, T25, T26, T27, T28, T29,
      T30, T31, T32, T33, T34, T35, T36, T37, T38, T39, T40, T41, T42, T43,
      T44> Tail;
};

template <typename T1, typename T2, typename T3, typename T4, typename T5,
    typename T6, typename T7, typename T8, typename T9, typename T10,
    typename T11, typename T12, typename T13, typename T14, typename T15,
    typename T16, typename T17, typename T18, typename T19, typename T20,
    typename T21, typename T22, typename T23, typename T24, typename T25,
    typename T26, typename T27, typename T28, typename T29, typename T30,
    typename T31, typename T32, typename T33, typename T34, typename T35,
    typename T36, typename T37, typename T38, typename T39, typename T40,
    typename T41, typename T42, typename T43, typename T44, typename T45>
struct Types45 {
  typedef T1 Head;
  typedef Types44<T2, T3, T4, T5, T6, T7, T8, T9, T10, T11, T12, T13, T14, T15,
      T16, T17, T18, T19, T20, T21, T22, T23, T24, T25, T26, T27, T28, T29,
      T30, T31, T32, T33, T34, T35, T36, T37, T38, T39, T40, T41, T42, T43,
      T44, T45> Tail;
};

template <typename T1, typename T2, typename T3, typename T4, typename T5,
    typename T6, typename T7, typename T8, typename T9, typename T10,
    typename T11, typename T12, typename T13, typename T14, typename T15,
    typename T16, typename T17, typename T18, typename T19, typename T20,
    typename T21, typename T22, typename T23, typename T24, typename T25,
    typename T26, typename T27, typename T28, typename T29, typename T30,
    typename T31, typename T32, typename T33, typename T34, typename T35,
    typename T36, typename T37, typename T38, typename T39, typename T40,
    typename T41, typename T42, typename T43, typename T44, typename T45,
    typename T46>
struct Types46 {
  typedef T1 Head;
  typedef Types45<T2, T3, T4, T5, T6, T7, T8, T9, T10, T11, T12, T13, T14, T15,
      T16, T17, T18, T19, T20, T21, T22, T23, T24, T25, T26, T27, T28, T29,
      T30, T31, T32, T33, T34, T35, T36, T37, T38, T39, T40, T41, T42, T43,
      T44, T45, T46> Tail;
};

template <typename T1, typename T2, typename T3, typename T4, typename T5,
    typename T6, typename T7, typename T8, typename T9, typename T10,
    typename T11, typename T12, typename T13, typename T14, typename T15,
    typename T16, typename T17, typename T18, typename T19, typename T20,
    typename T21, typename T22, typename T23, typename T24, typename T25,
    typename T26, typename T27, typename T28, typename T29, typename T30,
    typename T31, typename T32, typename T33, typename T34, typename T35,
    typename T36, typename T37, typename T38, typename T39, typename T40,
    typename T41, typename T42, typename T43, typename T44, typename T45,
    typename T46, typename T47>
struct Types47 {
  typedef T1 Head;
  typedef Types46<T2, T3, T4, T5, T6, T7, T8, T9, T10, T11, T12, T13, T14, T15,
      T16, T17, T18, T19, T20, T21, T22, T23, T24, T25, T26, T27, T28, T29,
      T30, T31, T32, T33, T34, T35, T36, T37, T38, T39, T40, T41, T42, T43,
      T44, T45, T46, T47> Tail;
};

template <typename T1, typename T2, typename T3, typename T4, typename T5,
    typename T6, typename T7, typename T8, typename T9, typename T10,
    typename T11, typename T12, typename T13, typename T14, typename T15,
    typename T16, typename T17, typename T18, typename T19, typename T20,
    typename T21, typename T22, typename T23, typename T24, typename T25,
    typename T26, typename T27, typename T28, typename T29, typename T30,
    typename T31, typename T32, typename T33, typename T34, typename T35,
    typename T36, typename T37, typename T38, typename T39, typename T40,
    typename T41, typename T42, typename T43, typename T44, typename T45,
    typename T46, typename T47, typename T48>
struct Types48 {
  typedef T1 Head;
  typedef Types47<T2, T3, T4, T5, T6, T7, T8, T9, T10, T11, T12, T13, T14, T15,
      T16, T17, T18, T19, T20, T21, T22, T23, T24, T25, T26, T27, T28, T29,
      T30, T31, T32, T33, T34, T35, T36, T37, T38, T39, T40, T41, T42, T43,
      T44, T45, T46, T47, T48> Tail;
};

template <typename T1, typename T2, typename T3, typename T4, typename T5,
    typename T6, typename T7, typename T8, typename T9, typename T10,
    typename T11, typename T12, typename T13, typename T14, typename T15,
    typename T16, typename T17, typename T18, typename T19, typename T20,
    typename T21, typename T22, typename T23, typename T24, typename T25,
    typename T26, typename T27, typename T28, typename T29, typename T30,
    typename T31, typename T32, typename T33, typename T34, typename T35,
    typename T36, typename T37, typename T38, typename T39, typename T40,
    typename T41, typename T42, typename T43, typename T44, typename T45,
    typename T46, typename T47, typename T48, typename T49>
struct Types49 {
  typedef T1 Head;
  typedef Types48<T2, T3, T4, T5, T6, T7, T8, T9, T10, T11, T12, T13, T14, T15,
      T16, T17, T18, T19, T20, T21, T22, T23, T24, T25, T26, T27, T28, T29,
      T30, T31, T32, T33, T34, T35, T36, T37, T38, T39, T40, T41, T42, T43,
      T44, T45, T46, T47, T48, T49> Tail;
};

template <typename T1, typename T2, typename T3, typename T4, typename T5,
    typename T6, typename T7, typename T8, typename T9, typename T10,
    typename T11, typename T12, typename T13, typename T14, typename T15,
    typename T16, typename T17, typename T18, typename T19, typename T20,
    typename T21, typename T22, typename T23, typename T24, typename T25,
    typename T26, typename T27, typename T28, typename T29, typename T30,
    typename T31, typename T32, typename T33, typename T34, typename T35,
    typename T36, typename T37, typename T38, typename T39, typename T40,
    typename T41, typename T42, typename T43, typename T44, typename T45,
    typename T46, typename T47, typename T48, typename T49, typename T50>
struct Types50 {
  typedef T1 Head;
  typedef Types49<T2, T3, T4, T5, T6, T7, T8, T9, T10, T11, T12, T13, T14, T15,
      T16, T17, T18, T19, T20, T21, T22, T23, T24, T25, T26, T27, T28, T29,
      T30, T31, T32, T33, T34, T35, T36, T37, T38, T39, T40, T41, T42, T43,
      T44, T45, T46, T47, T48, T49, T50> Tail;
};


}  // namespace internal

// We don't want to require the users to write TypesN<...> directly,
// as that would require them to count the length.  Types<...> is much
// easier to write, but generates horrible messages when there is a
// compiler error, as gcc insists on printing out each template
// argument, even if it has the default value (this means Types<int>
// will appear as Types<int, None, None, ..., None> in the compiler
// errors).
//
// Our solution is to combine the best part of the two approaches: a
// user would write Types<T1, ..., TN>, and Google Test will translate
// that to TypesN<T1, ..., TN> internally to make error messages
// readable.  The translation is done by the 'type' member of the
// Types template.
template <typename T1 = internal::None, typename T2 = internal::None,
    typename T3 = internal::None, typename T4 = internal::None,
    typename T5 = internal::None, typename T6 = internal::None,
    typename T7 = internal::None, typename T8 = internal::None,
    typename T9 = internal::None, typename T10 = internal::None,
    typename T11 = internal::None, typename T12 = internal::None,
    typename T13 = internal::None, typename T14 = internal::None,
    typename T15 = internal::None, typename T16 = internal::None,
    typename T17 = internal::None, typename T18 = internal::None,
    typename T19 = internal::None, typename T20 = internal::None,
    typename T21 = internal::None, typename T22 = internal::None,
    typename T23 = internal::None, typename T24 = internal::None,
    typename T25 = internal::None, typename T26 = internal::None,
    typename T27 = internal::None, typename T28 = internal::None,
    typename T29 = internal::None, typename T30 = internal::None,
    typename T31 = internal::None, typename T32 = internal::None,
    typename T33 = internal::None, typename T34 = internal::None,
    typename T35 = internal::None, typename T36 = internal::None,
    typename T37 = internal::None, typename T38 = internal::None,
    typename T39 = internal::None, typename T40 = internal::None,
    typename T41 = internal::None, typename T42 = internal::None,
    typename T43 = internal::None, typename T44 = internal::None,
    typename T45 = internal::None, typename T46 = internal::None,
    typename T47 = internal::None, typename T48 = internal::None,
    typename T49 = internal::None, typename T50 = internal::None>
struct Types {
  typedef internal::Types50<T1, T2, T3, T4, T5, T6, T7, T8, T9, T10, T11, T12,
      T13, T14, T15, T16, T17, T18, T19, T20, T21, T22, T23, T24, T25, T26,
      T27, T28, T29, T30, T31, T32, T33, T34, T35, T36, T37, T38, T39, T40,
      T41, T42, T43, T44, T45, T46, T47, T48, T49, T50> type;
};

template <>
struct Types<internal::None, internal::None, internal::None, internal::None,
    internal::None, internal::None, internal::None, internal::None,
    internal::None, internal::None, internal::None, internal::None,
    internal::None, internal::None, internal::None, internal::None,
    internal::None, internal::None, internal::None, internal::None,
    internal::None, internal::None, internal::None, internal::None,
    internal::None, internal::None, internal::None, internal::None,
    internal::None, internal::None, internal::None, internal::None,
    internal::None, internal::None, internal::None, internal::None,
    internal::None, internal::None, internal::None, internal::None,
    internal::None, internal::None, internal::None, internal::None,
    internal::None, internal::None, internal::None, internal::None,
    internal::None, internal::None> {
  typedef internal::Types0 type;
};
template <typename T1>
struct Types<T1, internal::None, internal::None, internal::None,
    internal::None, internal::None, internal::None, internal::None,
    internal::None, internal::None, internal::None, internal::None,
    internal::None, internal::None, internal::None, internal::None,
    internal::None, internal::None, internal::None, internal::None,
    internal::None, internal::None, internal::None, internal::None,
    internal::None, internal::None, internal::None, internal::None,
    internal::None, internal::None, internal::None, internal::None,
    internal::None, internal::None, internal::None, internal::None,
    internal::None, internal::None, internal::None, internal::None,
    internal::None, internal::None, internal::None, internal::None,
    internal::None, internal::None, internal::None, internal::None,
    internal::None, internal::None> {
  typedef internal::Types1<T1> type;
};
template <typename T1, typename T2>
struct Types<T1, T2, internal::None, internal::None, internal::None,
    internal::None, internal::None, internal::None, internal::None,
    internal::None, internal::None, internal::None, internal::None,
    internal::None, internal::None, internal::None, internal::None,
    internal::None, internal::None, internal::None, internal::None,
    internal::None, internal::None, internal::None, internal::None,
    internal::None, internal::None, internal::None, internal::None,
    internal::None, internal::None, internal::None, internal::None,
    internal::None, internal::None, internal::None, internal::None,
    internal::None, internal::None, internal::None, internal::None,
    internal::None, internal::None, internal::None, internal::None,
    internal::None, internal::None, internal::None, internal::None,
    internal::None> {
  typedef internal::Types2<T1, T2> type;
};
template <typename T1, typename T2, typename T3>
struct Types<T1, T2, T3, internal::None, internal::None, internal::None,
    internal::None, internal::None, internal::None, internal::None,
    internal::None, internal::None, internal::None, internal::None,
    internal::None, internal::None, internal::None, internal::None,
    internal::None, internal::None, internal::None, internal::None,
    internal::None, internal::None, internal::None, internal::None,
    internal::None, internal::None, internal::None, internal::None,
    internal::None, internal::None, internal::None, internal::None,
    internal::None, internal::None, internal::None, internal::None,
    internal::None, internal::None, internal::None, internal::None,
    internal::None, internal::None, internal::None, internal::None,
    internal::None, internal::None, internal::None, internal::None> {
  typedef internal::Types3<T1, T2, T3> type;
};
template <typename T1, typename T2, typename T3, typename T4>
struct Types<T1, T2, T3, T4, internal::None, internal::None, internal::None,
    internal::None, internal::None, internal::None, internal::None,
    internal::None, internal::None, internal::None, internal::None,
    internal::None, internal::None, internal::None, internal::None,
    internal::None, internal::None, internal::None, internal::None,
    internal::None, internal::None, internal::None, internal::None,
    internal::None, internal::None, internal::None, internal::None,
    internal::None, internal::None, internal::None, internal::None,
    internal::None, internal::None, internal::None, internal::None,
    internal::None, internal::None, internal::None, internal::None,
    internal::None, internal::None, internal::None, internal::None,
    internal::None, internal::None, internal::None> {
  typedef internal::Types4<T1, T2, T3, T4> type;
};
template <typename T1, typename T2, typename T3, typename T4, typename T5>
struct Types<T1, T2, T3, T4, T5, internal::None, internal::None,
    internal::None, internal::None, internal::None, internal::None,
    internal::None, internal::None, internal::None, internal::None,
    internal::None, internal::None, internal::None, internal::None,
    internal::None, internal::None, internal::None, internal::None,
    internal::None, internal::None, internal::None, internal::None,
    internal::None, internal::None, internal::None, internal::None,
    internal::None, internal::None, internal::None, internal::None,
    internal::None, internal::None, internal::None, internal::None,
    internal::None, internal::None, internal::None, internal::None,
    internal::None, internal::None, internal::None, internal::None,
    internal::None, internal::None, internal::None> {
  typedef internal::Types5<T1, T2, T3, T4, T5> type;
};
template <typename T1, typename T2, typename T3, typename T4, typename T5,
    typename T6>
struct Types<T1, T2, T3, T4, T5, T6, internal::None, internal::None,
    internal::None, internal::None, internal::None, internal::None,
    internal::None, internal::None, internal::None, internal::None,
    internal::None, internal::None, internal::None, internal::None,
    internal::None, internal::None, internal::None, internal::None,
    internal::None, internal::None, internal::None, internal::None,
    internal::None, internal::None, internal::None, internal::None,
    internal::None, internal::None, internal::None, internal::None,
    internal::None, internal::None, internal::None, internal::None,
    internal::None, internal::None, internal::None, internal::None,
    internal::None, internal::None, internal::None, internal::None,
    internal::None, internal::None> {
  typedef internal::Types6<T1, T2, T3, T4, T5, T6> type;
};
template <typename T1, typename T2, typename T3, typename T4, typename T5,
    typename T6, typename T7>
struct Types<T1, T2, T3, T4, T5, T6, T7, internal::None, internal::None,
    internal::None, internal::None, internal::None, internal::None,
    internal::None, internal::None, internal::None, internal::None,
    internal::None, internal::None, internal::None, internal::None,
    internal::None, internal::None, internal::None, internal::None,
    internal::None, internal::None, internal::None, internal::None,
    internal::None, internal::None, internal::None, internal::None,
    internal::None, internal::None, internal::None, internal::None,
    internal::None, internal::None, internal::None, internal::None,
    internal::None, internal::None, internal::None, internal::None,
    internal::None, internal::None, internal::None, internal::None,
    internal::None> {
  typedef internal::Types7<T1, T2, T3, T4, T5, T6, T7> type;
};
template <typename T1, typename T2, typename T3, typename T4, typename T5,
    typename T6, typename T7, typename T8>
struct Types<T1, T2, T3, T4, T5, T6, T7, T8, internal::None, internal::None,
    internal::None, internal::None, internal::None, internal::None,
    internal::None, internal::None, internal::None, internal::None,
    internal::None, internal::None, internal::None, internal::None,
    internal::None, internal::None, internal::None, internal::None,
    internal::None, internal::None, internal::None, internal::None,
    internal::None, internal::None, internal::None, internal::None,
    internal::None, internal::None, internal::None, internal::None,
    internal::None, internal::None, internal::None, internal::None,
    internal::None, internal::None, internal::None, internal::None,
    internal::None, internal::None, internal::None, internal::None> {
  typedef internal::Types8<T1, T2, T3, T4, T5, T6, T7, T8> type;
};
template <typename T1, typename T2, typename T3, typename T4, typename T5,
    typename T6, typename T7, typename T8, typename T9>
struct Types<T1, T2, T3, T4, T5, T6, T7, T8, T9, internal::None,
    internal::None, internal::None, internal::None, internal::None,
    internal::None, internal::None, internal::None, internal::None,
    internal::None, internal::None, internal::None, internal::None,
    internal::None, internal::None, internal::None, internal::None,
    internal::None, internal::None, internal::None, internal::None,
    internal::None, internal::None, internal::None, internal::None,
    internal::None, internal::None, internal::None, internal::None,
    internal::None, internal::None, internal::None, internal::None,
    internal::None, internal::None, internal::None, internal::None,
    internal::None, internal::None, internal::None, internal::None> {
  typedef internal::Types9<T1, T2, T3, T4, T5, T6, T7, T8, T9> type;
};
template <typename T1, typename T2, typename T3, typename T4, typename T5,
    typename T6, typename T7, typename T8, typename T9, typename T10>
struct Types<T1, T2, T3, T4, T5, T6, T7, T8, T9, T10, internal::None,
    internal::None, internal::None, internal::None, internal::None,
    internal::None, internal::None, internal::None, internal::None,
    internal::None, internal::None, internal::None, internal::None,
    internal::None, internal::None, internal::None, internal::None,
    internal::None, internal::None, internal::None, internal::None,
    internal::None, internal::None, internal::None, internal::None,
    internal::None, internal::None, internal::None, internal::None,
    internal::None, internal::None, internal::None, internal::None,
    internal::None, internal::None, internal::None, internal::None,
    internal::None, internal::None, internal::None> {
  typedef internal::Types10<T1, T2, T3, T4, T5, T6, T7, T8, T9, T10> type;
};
template <typename T1, typename T2, typename T3, typename T4, typename T5,
    typename T6, typename T7, typename T8, typename T9, typename T10,
    typename T11>
struct Types<T1, T2, T3, T4, T5, T6, T7, T8, T9, T10, T11, internal::None,
    internal::None, internal::None, internal::None, internal::None,
    internal::None, internal::None, internal::None, internal::None,
    internal::None, internal::None, internal::None, internal::None,
    internal::None, internal::None, internal::None, internal::None,
    internal::None, internal::None, internal::None, internal::None,
    internal::None, internal::None, internal::None, internal::None,
    internal::None, internal::None, internal::None, internal::None,
    internal::None, internal::None, internal::None, internal::None,
    internal::None, internal::None, internal::None, internal::None,
    internal::None, internal::None> {
  typedef internal::Types11<T1, T2, T3, T4, T5, T6, T7, T8, T9, T10, T11> type;
};
template <typename T1, typename T2, typename T3, typename T4, typename T5,
    typename T6, typename T7, typename T8, typename T9, typename T10,
    typename T11, typename T12>
struct Types<T1, T2, T3, T4, T5, T6, T7, T8, T9, T10, T11, T12, internal::None,
    internal::None, internal::None, internal::None, internal::None,
    internal::None, internal::None, internal::None, internal::None,
    internal::None, internal::None, internal::None, internal::None,
    internal::None, internal::None, internal::None, internal::None,
    internal::None, internal::None, internal::None, internal::None,
    internal::None, internal::None, internal::None, internal::None,
    internal::None, internal::None, internal::None, internal::None,
    internal::None, internal::None, internal::None, internal::None,
    internal::None, internal::None, internal::None, internal::None,
    internal::None> {
  typedef internal::Types12<T1, T2, T3, T4, T5, T6, T7, T8, T9, T10, T11,
      T12> type;
};
template <typename T1, typename T2, typename T3, typename T4, typename T5,
    typename T6, typename T7, typename T8, typename T9, typename T10,
    typename T11, typename T12, typename T13>
struct Types<T1, T2, T3, T4, T5, T6, T7, T8, T9, T10, T11, T12, T13,
    internal::None, internal::None, internal::None, internal::None,
    internal::None, internal::None, internal::None, internal::None,
    internal::None, internal::None, internal::None, internal::None,
    internal::None, internal::None, internal::None, internal::None,
    internal::None, internal::None, internal::None, internal::None,
    internal::None, internal::None, internal::None, internal::None,
    internal::None, internal::None, internal::None, internal::None,
    internal::None, internal::None, internal::None, internal::None,
    internal::None, internal::None, internal::None, internal::None,
    internal::None> {
  typedef internal::Types13<T1, T2, T3, T4, T5, T6, T7, T8, T9, T10, T11, T12,
      T13> type;
};
template <typename T1, typename T2, typename T3, typename T4, typename T5,
    typename T6, typename T7, typename T8, typename T9, typename T10,
    typename T11, typename T12, typename T13, typename T14>
struct Types<T1, T2, T3, T4, T5, T6, T7, T8, T9, T10, T11, T12, T13, T14,
    internal::None, internal::None, internal::None, internal::None,
    internal::None, internal::None, internal::None, internal::None,
    internal::None, internal::None, internal::None, internal::None,
    internal::None, internal::None, internal::None, internal::None,
    internal::None, internal::None, internal::None, internal::None,
    internal::None, internal::None, internal::None, internal::None,
    internal::None, internal::None, internal::None, internal::None,
    internal::None, internal::None, internal::None, internal::None,
    internal::None, internal::None, internal::None, internal::None> {
  typedef internal::Types14<T1, T2, T3, T4, T5, T6, T7, T8, T9, T10, T11, T12,
      T13, T14> type;
};
template <typename T1, typename T2, typename T3, typename T4, typename T5,
    typename T6, typename T7, typename T8, typename T9, typename T10,
    typename T11, typename T12, typename T13, typename T14, typename T15>
struct Types<T1, T2, T3, T4, T5, T6, T7, T8, T9, T10, T11, T12, T13, T14, T15,
    internal::None, internal::None, internal::None, internal::None,
    internal::None, internal::None, internal::None, internal::None,
    internal::None, internal::None, internal::None, internal::None,
    internal::None, internal::None, internal::None, internal::None,
    internal::None, internal::None, internal::None, internal::None,
    internal::None, internal::None, internal::None, internal::None,
    internal::None, internal::None, internal::None, internal::None,
    internal::None, internal::None, internal::None, internal::None,
    internal::None, internal::None, internal::None> {
  typedef internal::Types15<T1, T2, T3, T4, T5, T6, T7, T8, T9, T10, T11, T12,
      T13, T14, T15> type;
};
template <typename T1, typename T2, typename T3, typename T4, typename T5,
    typename T6, typename T7, typename T8, typename T9, typename T10,
    typename T11, typename T12, typename T13, typename T14, typename T15,
    typename T16>
struct Types<T1, T2, T3, T4, T5, T6, T7, T8, T9, T10, T11, T12, T13, T14, T15,
    T16, internal::None, internal::None, internal::None, internal::None,
    internal::None, internal::None, internal::None, internal::None,
    internal::None, internal::None, internal::None, internal::None,
    internal::None, internal::None, internal::None, internal::None,
    internal::None, internal::None, internal::None, internal::None,
    internal::None, internal::None, internal::None, internal::None,
    internal::None, internal::None, internal::None, internal::None,
    internal::None, internal::None, internal::None, internal::None,
    internal::None, internal::None> {
  typedef internal::Types16<T1, T2, T3, T4, T5, T6, T7, T8, T9, T10, T11, T12,
      T13, T14, T15, T16> type;
};
template <typename T1, typename T2, typename T3, typename T4, typename T5,
    typename T6, typename T7, typename T8, typename T9, typename T10,
    typename T11, typename T12, typename T13, typename T14, typename T15,
    typename T16, typename T17>
struct Types<T1, T2, T3, T4, T5, T6, T7, T8, T9, T10, T11, T12, T13, T14, T15,
    T16, T17, internal::None, internal::None, internal::None, internal::None,
    internal::None, internal::None, internal::None, internal::None,
    internal::None, internal::None, internal::None, internal::None,
    internal::None, internal::None, internal::None, internal::None,
    internal::None, internal::None, internal::None, internal::None,
    internal::None, internal::None, internal::None, internal::None,
    internal::None, internal::None, internal::None, internal::None,
    internal::None, internal::None, internal::None, internal::None,
    internal::None> {
  typedef internal::Types17<T1, T2, T3, T4, T5, T6, T7, T8, T9, T10, T11, T12,
      T13, T14, T15, T16, T17> type;
};
template <typename T1, typename T2, typename T3, typename T4, typename T5,
    typename T6, typename T7, typename T8, typename T9, typename T10,
    typename T11, typename T12, typename T13, typename T14, typename T15,
    typename T16, typename T17, typename T18>
struct Types<T1, T2, T3, T4, T5, T6, T7, T8, T9, T10, T11, T12, T13, T14, T15,
    T16, T17, T18, internal::None, internal::None, internal::None,
    internal::None, internal::None, internal::None, internal::None,
    internal::None, internal::None, internal::None, internal::None,
    internal::None, internal::None, internal::None, internal::None,
    internal::None, internal::None, internal::None, internal::None,
    internal::None, internal::None, internal::None, internal::None,
    internal::None, internal::None, internal::None, internal::None,
    internal::None, internal::None, internal::None, internal::None,
    internal::None> {
  typedef internal::Types18<T1, T2, T3, T4, T5, T6, T7, T8, T9, T10, T11, T12,
      T13, T14, T15, T16, T17, T18> type;
};
template <typename T1, typename T2, typename T3, typename T4, typename T5,
    typename T6, typename T7, typename T8, typename T9, typename T10,
    typename T11, typename T12, typename T13, typename T14, typename T15,
    typename T16, typename T17, typename T18, typename T19>
struct Types<T1, T2, T3, T4, T5, T6, T7, T8, T9, T10, T11, T12, T13, T14, T15,
    T16, T17, T18, T19, internal::None, internal::None, internal::None,
    internal::None, internal::None, internal::None, internal::None,
    internal::None, internal::None, internal::None, internal::None,
    internal::None, internal::None, internal::None, internal::None,
    internal::None, internal::None, internal::None, internal::None,
    internal::None, internal::None, internal::None, internal::None,
    internal::None, internal::None, internal::None, internal::None,
    internal::None, internal::None, internal::None, internal::None> {
  typedef internal::Types19<T1, T2, T3, T4, T5, T6, T7, T8, T9, T10, T11, T12,
      T13, T14, T15, T16, T17, T18, T19> type;
};
template <typename T1, typename T2, typename T3, typename T4, typename T5,
    typename T6, typename T7, typename T8, typename T9, typename T10,
    typename T11, typename T12, typename T13, typename T14, typename T15,
    typename T16, typename T17, typename T18, typename T19, typename T20>
struct Types<T1, T2, T3, T4, T5, T6, T7, T8, T9, T10, T11, T12, T13, T14, T15,
    T16, T17, T18, T19, T20, internal::None, internal::None, internal::None,
    internal::None, internal::None, internal::None, internal::None,
    internal::None, internal::None, internal::None, internal::None,
    internal::None, internal::None, internal::None, internal::None,
    internal::None, internal::None, internal::None, internal::None,
    internal::None, internal::None, internal::None, internal::None,
    internal::None, internal::None, internal::None, internal::None,
    internal::None, internal::None, internal::None> {
  typedef internal::Types20<T1, T2, T3, T4, T5, T6, T7, T8, T9, T10, T11, T12,
      T13, T14, T15, T16, T17, T18, T19, T20> type;
};
template <typename T1, typename T2, typename T3, typename T4, typename T5,
    typename T6, typename T7, typename T8, typename T9, typename T10,
    typename T11, typename T12, typename T13, typename T14, typename T15,
    typename T16, typename T17, typename T18, typename T19, typename T20,
    typename T21>
struct Types<T1, T2, T3, T4, T5, T6, T7, T8, T9, T10, T11, T12, T13, T14, T15,
    T16, T17, T18, T19, T20, T21, internal::None, internal::None,
    internal::None, internal::None, internal::None, internal::None,
    internal::None, internal::None, internal::None, internal::None,
    internal::None, internal::None, internal::None, internal::None,
    internal::None, internal::None, internal::None, internal::None,
    internal::None, internal::None, internal::None, internal::None,
    internal::None, internal::None, internal::None, internal::None,
    internal::None, internal::None, internal::None> {
  typedef internal::Types21<T1, T2, T3, T4, T5, T6, T7, T8, T9, T10, T11, T12,
      T13, T14, T15, T16, T17, T18, T19, T20, T21> type;
};
template <typename T1, typename T2, typename T3, typename T4, typename T5,
    typename T6, typename T7, typename T8, typename T9, typename T10,
    typename T11, typename T12, typename T13, typename T14, typename T15,
    typename T16, typename T17, typename T18, typename T19, typename T20,
    typename T21, typename T22>
struct Types<T1, T2, T3, T4, T5, T6, T7, T8, T9, T10, T11, T12, T13, T14, T15,
    T16, T17, T18, T19, T20, T21, T22, internal::None, internal::None,
    internal::None, internal::None, internal::None, internal::None,
    internal::None, internal::None, internal::None, internal::None,
    internal::None, internal::None, internal::None, internal::None,
    internal::None, internal::None, internal::None, internal::None,
    internal::None, internal::None, internal::None, internal::None,
    internal::None, internal::None, internal::None, internal::None,
    internal::None, internal::None> {
  typedef internal::Types22<T1, T2, T3, T4, T5, T6, T7, T8, T9, T10, T11, T12,
      T13, T14, T15, T16, T17, T18, T19, T20, T21, T22> type;
};
template <typename T1, typename T2, typename T3, typename T4, typename T5,
    typename T6, typename T7, typename T8, typename T9, typename T10,
    typename T11, typename T12, typename T13, typename T14, typename T15,
    typename T16, typename T17, typename T18, typename T19, typename T20,
    typename T21, typename T22, typename T23>
struct Types<T1, T2, T3, T4, T5, T6, T7, T8, T9, T10, T11, T12, T13, T14, T15,
    T16, T17, T18, T19, T20, T21, T22, T23, internal::None, internal::None,
    internal::None, internal::None, internal::None, internal::None,
    internal::None, internal::None, internal::None, internal::None,
    internal::None, internal::None, internal::None, internal::None,
    internal::None, internal::None, internal::None, internal::None,
    internal::None, internal::None, internal::None, internal::None,
    internal::None, internal::None, internal::None, internal::None,
    internal::None> {
  typedef internal::Types23<T1, T2, T3, T4, T5, T6, T7, T8, T9, T10, T11, T12,
      T13, T14, T15, T16, T17, T18, T19, T20, T21, T22, T23> type;
};
template <typename T1, typename T2, typename T3, typename T4, typename T5,
    typename T6, typename T7, typename T8, typename T9, typename T10,
    typename T11, typename T12, typename T13, typename T14, typename T15,
    typename T16, typename T17, typename T18, typename T19, typename T20,
    typename T21, typename T22, typename T23, typename T24>
struct Types<T1, T2, T3, T4, T5, T6, T7, T8, T9, T10, T11, T12, T13, T14, T15,
    T16, T17, T18, T19, T20, T21, T22, T23, T24, internal::None,
    internal::None, internal::None, internal::None, internal::None,
    internal::None, internal::None, internal::None, internal::None,
    internal::None, internal::None, internal::None, internal::None,
    internal::None, internal::None, internal::None, internal::None,
    internal::None, internal::None, internal::None, internal::None,
    internal::None, internal::None, internal::None, internal::None,
    internal::None> {
  typedef internal::Types24<T1, T2, T3, T4, T5, T6, T7, T8, T9, T10, T11, T12,
      T13, T14, T15, T16, T17, T18, T19, T20, T21, T22, T23, T24> type;
};
template <typename T1, typename T2, typename T3, typename T4, typename T5,
    typename T6, typename T7, typename T8, typename T9, typename T10,
    typename T11, typename T12, typename T13, typename T14, typename T15,
    typename T16, typename T17, typename T18, typename T19, typename T20,
    typename T21, typename T22, typename T23, typename T24, typename T25>
struct Types<T1, T2, T3, T4, T5, T6, T7, T8, T9, T10, T11, T12, T13, T14, T15,
    T16, T17, T18, T19, T20, T21, T22, T23, T24, T25, internal::None,
    internal::None, internal::None, internal::None, internal::None,
    internal::None, internal::None, internal::None, internal::None,
    internal::None, internal::None, internal::None, internal::None,
    internal::None, internal::None, internal::None, internal::None,
    internal::None, internal::None, internal::None, internal::None,
    internal::None, internal::None, internal::None, internal::None> {
  typedef internal::Types25<T1, T2, T3, T4, T5, T6, T7, T8, T9, T10, T11, T12,
      T13, T14, T15, T16, T17, T18, T19, T20, T21, T22, T23, T24, T25> type;
};
template <typename T1, typename T2, typename T3, typename T4, typename T5,
    typename T6, typename T7, typename T8, typename T9, typename T10,
    typename T11, typename T12, typename T13, typename T14, typename T15,
    typename T16, typename T17, typename T18, typename T19, typename T20,
    typename T21, typename T22, typename T23, typename T24, typename T25,
    typename T26>
struct Types<T1, T2, T3, T4, T5, T6, T7, T8, T9, T10, T11, T12, T13, T14, T15,
    T16, T17, T18, T19, T20, T21, T22, T23, T24, T25, T26, internal::None,
    internal::None, internal::None, internal::None, internal::None,
    internal::None, internal::None, internal::None, internal::None,
    internal::None, internal::None, internal::None, internal::None,
    internal::None, internal::None, internal::None, internal::None,
    internal::None, internal::None, internal::None, internal::None,
    internal::None, internal::None, internal::None> {
  typedef internal::Types26<T1, T2, T3, T4, T5, T6, T7, T8, T9, T10, T11, T12,
      T13, T14, T15, T16, T17, T18, T19, T20, T21, T22, T23, T24, T25,
      T26> type;
};
template <typename T1, typename T2, typename T3, typename T4, typename T5,
    typename T6, typename T7, typename T8, typename T9, typename T10,
    typename T11, typename T12, typename T13, typename T14, typename T15,
    typename T16, typename T17, typename T18, typename T19, typename T20,
    typename T21, typename T22, typename T23, typename T24, typename T25,
    typename T26, typename T27>
struct Types<T1, T2, T3, T4, T5, T6, T7, T8, T9, T10, T11, T12, T13, T14, T15,
    T16, T17, T18, T19, T20, T21, T22, T23, T24, T25, T26, T27, internal::None,
    internal::None, internal::None, internal::None, internal::None,
    internal::None, internal::None, internal::None, internal::None,
    internal::None, internal::None, internal::None, internal::None,
    internal::None, internal::None, internal::None, internal::None,
    internal::None, internal::None, internal::None, internal::None,
    internal::None, internal::None> {
  typedef internal::Types27<T1, T2, T3, T4, T5, T6, T7, T8, T9, T10, T11, T12,
      T13, T14, T15, T16, T17, T18, T19, T20, T21, T22, T23, T24, T25, T26,
      T27> type;
};
template <typename T1, typename T2, typename T3, typename T4, typename T5,
    typename T6, typename T7, typename T8, typename T9, typename T10,
    typename T11, typename T12, typename T13, typename T14, typename T15,
    typename T16, typename T17, typename T18, typename T19, typename T20,
    typename T21, typename T22, typename T23, typename T24, typename T25,
    typename T26, typename T27, typename T28>
struct Types<T1, T2, T3, T4, T5, T6, T7, T8, T9, T10, T11, T12, T13, T14, T15,
    T16, T17, T18, T19, T20, T21, T22, T23, T24, T25, T26, T27, T28,
    internal::None, internal::None, internal::None, internal::None,
    internal::None, internal::None, internal::None, internal::None,
    internal::None, internal::None, internal::None, internal::None,
    internal::None, internal::None, internal::None, internal::None,
    internal::None, internal::None, internal::None, internal::None,
    internal::None, internal::None> {
  typedef internal::Types28<T1, T2, T3, T4, T5, T6, T7, T8, T9, T10, T11, T12,
      T13, T14, T15, T16, T17, T18, T19, T20, T21, T22, T23, T24, T25, T26,
      T27, T28> type;
};
template <typename T1, typename T2, typename T3, typename T4, typename T5,
    typename T6, typename T7, typename T8, typename T9, typename T10,
    typename T11, typename T12, typename T13, typename T14, typename T15,
    typename T16, typename T17, typename T18, typename T19, typename T20,
    typename T21, typename T22, typename T23, typename T24, typename T25,
    typename T26, typename T27, typename T28, typename T29>
struct Types<T1, T2, T3, T4, T5, T6, T7, T8, T9, T10, T11, T12, T13, T14, T15,
    T16, T17, T18, T19, T20, T21, T22, T23, T24, T25, T26, T27, T28, T29,
    internal::None, internal::None, internal::None, internal::None,
    internal::None, internal::None, internal::None, internal::None,
    internal::None, internal::None, internal::None, internal::None,
    internal::None, internal::None, internal::None, internal::None,
    internal::None, internal::None, internal::None, internal::None,
    internal::None> {
  typedef internal::Types29<T1, T2, T3, T4, T5, T6, T7, T8, T9, T10, T11, T12,
      T13, T14, T15, T16, T17, T18, T19, T20, T21, T22, T23, T24, T25, T26,
      T27, T28, T29> type;
};
template <typename T1, typename T2, typename T3, typename T4, typename T5,
    typename T6, typename T7, typename T8, typename T9, typename T10,
    typename T11, typename T12, typename T13, typename T14, typename T15,
    typename T16, typename T17, typename T18, typename T19, typename T20,
    typename T21, typename T22, typename T23, typename T24, typename T25,
    typename T26, typename T27, typename T28, typename T29, typename T30>
struct Types<T1, T2, T3, T4, T5, T6, T7, T8, T9, T10, T11, T12, T13, T14, T15,
    T16, T17, T18, T19, T20, T21, T22, T23, T24, T25, T26, T27, T28, T29, T30,
    internal::None, internal::None, internal::None, internal::None,
    internal::None, internal::None, internal::None, internal::None,
    internal::None, internal::None, internal::None, internal::None,
    internal::None, internal::None, internal::None, internal::None,
    internal::None, internal::None, internal::None, internal::None> {
  typedef internal::Types30<T1, T2, T3, T4, T5, T6, T7, T8, T9, T10, T11, T12,
      T13, T14, T15, T16, T17, T18, T19, T20, T21, T22, T23, T24, T25, T26,
      T27, T28, T29, T30> type;
};
template <typename T1, typename T2, typename T3, typename T4, typename T5,
    typename T6, typename T7, typename T8, typename T9, typename T10,
    typename T11, typename T12, typename T13, typename T14, typename T15,
    typename T16, typename T17, typename T18, typename T19, typename T20,
    typename T21, typename T22, typename T23, typename T24, typename T25,
    typename T26, typename T27, typename T28, typename T29, typename T30,
    typename T31>
struct Types<T1, T2, T3, T4, T5, T6, T7, T8, T9, T10, T11, T12, T13, T14, T15,
    T16, T17, T18, T19, T20, T21, T22, T23, T24, T25, T26, T27, T28, T29, T30,
    T31, internal::None, internal::None, internal::None, internal::None,
    internal::None, internal::None, internal::None, internal::None,
    internal::None, internal::None, internal::None, internal::None,
    internal::None, internal::None, internal::None, internal::None,
    internal::None, internal::None, internal::None> {
  typedef internal::Types31<T1, T2, T3, T4, T5, T6, T7, T8, T9, T10, T11, T12,
      T13, T14, T15, T16, T17, T18, T19, T20, T21, T22, T23, T24, T25, T26,
      T27, T28, T29, T30, T31> type;
};
template <typename T1, typename T2, typename T3, typename T4, typename T5,
    typename T6, typename T7, typename T8, typename T9, typename T10,
    typename T11, typename T12, typename T13, typename T14, typename T15,
    typename T16, typename T17, typename T18, typename T19, typename T20,
    typename T21, typename T22, typename T23, typename T24, typename T25,
    typename T26, typename T27, typename T28, typename T29, typename T30,
    typename T31, typename T32>
struct Types<T1, T2, T3, T4, T5, T6, T7, T8, T9, T10, T11, T12, T13, T14, T15,
    T16, T17, T18, T19, T20, T21, T22, T23, T24, T25, T26, T27, T28, T29, T30,
    T31, T32, internal::None, internal::None, internal::None, internal::None,
    internal::None, internal::None, internal::None, internal::None,
    internal::None, internal::None, internal::None, internal::None,
    internal::None, internal::None, internal::None, internal::None,
    internal::None, internal::None> {
  typedef internal::Types32<T1, T2, T3, T4, T5, T6, T7, T8, T9, T10, T11, T12,
      T13, T14, T15, T16, T17, T18, T19, T20, T21, T22, T23, T24, T25, T26,
      T27, T28, T29, T30, T31, T32> type;
};
template <typename T1, typename T2, typename T3, typename T4, typename T5,
    typename T6, typename T7, typename T8, typename T9, typename T10,
    typename T11, typename T12, typename T13, typename T14, typename T15,
    typename T16, typename T17, typename T18, typename T19, typename T20,
    typename T21, typename T22, typename T23, typename T24, typename T25,
    typename T26, typename T27, typename T28, typename T29, typename T30,
    typename T31, typename T32, typename T33>
struct Types<T1, T2, T3, T4, T5, T6, T7, T8, T9, T10, T11, T12, T13, T14, T15,
    T16, T17, T18, T19, T20, T21, T22, T23, T24, T25, T26, T27, T28, T29, T30,
    T31, T32, T33, internal::None, internal::None, internal::None,
    internal::None, internal::None, internal::None, internal::None,
    internal::None, internal::None, internal::None, internal::None,
    internal::None, internal::None, internal::None, internal::None,
    internal::None, internal::None> {
  typedef internal::Types33<T1, T2, T3, T4, T5, T6, T7, T8, T9, T10, T11, T12,
      T13, T14, T15, T16, T17, T18, T19, T20, T21, T22, T23, T24, T25, T26,
      T27, T28, T29, T30, T31, T32, T33> type;
};
template <typename T1, typename T2, typename T3, typename T4, typename T5,
    typename T6, typename T7, typename T8, typename T9, typename T10,
    typename T11, typename T12, typename T13, typename T14, typename T15,
    typename T16, typename T17, typename T18, typename T19, typename T20,
    typename T21, typename T22, typename T23, typename T24, typename T25,
    typename T26, typename T27, typename T28, typename T29, typename T30,
    typename T31, typename T32, typename T33, typename T34>
struct Types<T1, T2, T3, T4, T5, T6, T7, T8, T9, T10, T11, T12, T13, T14, T15,
    T16, T17, T18, T19, T20, T21, T22, T23, T24, T25, T26, T27, T28, T29, T30,
    T31, T32, T33, T34, internal::None, internal::None, internal::None,
    internal::None, internal::None, internal::None, internal::None,
    internal::None, internal::None, internal::None, internal::None,
    internal::None, internal::None, internal::None, internal::None,
    internal::None> {
  typedef internal::Types34<T1, T2, T3, T4, T5, T6, T7, T8, T9, T10, T11, T12,
      T13, T14, T15, T16, T17, T18, T19, T20, T21, T22, T23, T24, T25, T26,
      T27, T28, T29, T30, T31, T32, T33, T34> type;
};
template <typename T1, typename T2, typename T3, typename T4, typename T5,
    typename T6, typename T7, typename T8, typename T9, typename T10,
    typename T11, typename T12, typename T13, typename T14, typename T15,
    typename T16, typename T17, typename T18, typename T19, typename T20,
    typename T21, typename T22, typename T23, typename T24, typename T25,
    typename T26, typename T27, typename T28, typename T29, typename T30,
    typename T31, typename T32, typename T33, typename T34, typename T35>
struct Types<T1, T2, T3, T4, T5, T6, T7, T8, T9, T10, T11, T12, T13, T14, T15,
    T16, T17, T18, T19, T20, T21, T22, T23, T24, T25, T26, T27, T28, T29, T30,
    T31, T32, T33, T34, T35, internal::None, internal::None, internal::None,
    internal::None, internal::None, internal::None, internal::None,
    internal::None, internal::None, internal::None, internal::None,
    internal::None, internal::None, internal::None, internal::None> {
  typedef internal::Types35<T1, T2, T3, T4, T5, T6, T7, T8, T9, T10, T11, T12,
      T13, T14, T15, T16, T17, T18, T19, T20, T21, T22, T23, T24, T25, T26,
      T27, T28, T29, T30, T31, T32, T33, T34, T35> type;
};
template <typename T1, typename T2, typename T3, typename T4, typename T5,
    typename T6, typename T7, typename T8, typename T9, typename T10,
    typename T11, typename T12, typename T13, typename T14, typename T15,
    typename T16, typename T17, typename T18, typename T19, typename T20,
    typename T21, typename T22, typename T23, typename T24, typename T25,
    typename T26, typename T27, typename T28, typename T29, typename T30,
    typename T31, typename T32, typename T33, typename T34, typename T35,
    typename T36>
struct Types<T1, T2, T3, T4, T5, T6, T7, T8, T9, T10, T11, T12, T13, T14, T15,
    T16, T17, T18, T19, T20, T21, T22, T23, T24, T25, T26, T27, T28, T29, T30,
    T31, T32, T33, T34, T35, T36, internal::None, internal::None,
    internal::None, internal::None, internal::None, internal::None,
    internal::None, internal::None, internal::None, internal::None,
    internal::None, internal::None, internal::None, internal::None> {
  typedef internal::Types36<T1, T2, T3, T4, T5, T6, T7, T8, T9, T10, T11, T12,
      T13, T14, T15, T16, T17, T18, T19, T20, T21, T22, T23, T24, T25, T26,
      T27, T28, T29, T30, T31, T32, T33, T34, T35, T36> type;
};
template <typename T1, typename T2, typename T3, typename T4, typename T5,
    typename T6, typename T7, typename T8, typename T9, typename T10,
    typename T11, typename T12, typename T13, typename T14, typename T15,
    typename T16, typename T17, typename T18, typename T19, typename T20,
    typename T21, typename T22, typename T23, typename T24, typename T25,
    typename T26, typename T27, typename T28, typename T29, typename T30,
    typename T31, typename T32, typename T33, typename T34, typename T35,
    typename T36, typename T37>
struct Types<T1, T2, T3, T4, T5, T6, T7, T8, T9, T10, T11, T12, T13, T14, T15,
    T16, T17, T18, T19, T20, T21, T22, T23, T24, T25, T26, T27, T28, T29, T30,
    T31, T32, T33, T34, T35, T36, T37, internal::None, internal::None,
    internal::None, internal::None, internal::None, internal::None,
    internal::None, internal::None, internal::None, internal::None,
    internal::None, internal::None, internal::None> {
  typedef internal::Types37<T1, T2, T3, T4, T5, T6, T7, T8, T9, T10, T11, T12,
      T13, T14, T15, T16, T17, T18, T19, T20, T21, T22, T23, T24, T25, T26,
      T27, T28, T29, T30, T31, T32, T33, T34, T35, T36, T37> type;
};
template <typename T1, typename T2, typename T3, typename T4, typename T5,
    typename T6, typename T7, typename T8, typename T9, typename T10,
    typename T11, typename T12, typename T13, typename T14, typename T15,
    typename T16, typename T17, typename T18, typename T19, typename T20,
    typename T21, typename T22, typename T23, typename T24, typename T25,
    typename T26, typename T27, typename T28, typename T29, typename T30,
    typename T31, typename T32, typename T33, typename T34, typename T35,
    typename T36, typename T37, typename T38>
struct Types<T1, T2, T3, T4, T5, T6, T7, T8, T9, T10, T11, T12, T13, T14, T15,
    T16, T17, T18, T19, T20, T21, T22, T23, T24, T25, T26, T27, T28, T29, T30,
    T31, T32, T33, T34, T35, T36, T37, T38, internal::None, internal::None,
    internal::None, internal::None, internal::None, internal::None,
    internal::None, internal::None, internal::None, internal::None,
    internal::None, internal::None> {
  typedef internal::Types38<T1, T2, T3, T4, T5, T6, T7, T8, T9, T10, T11, T12,
      T13, T14, T15, T16, T17, T18, T19, T20, T21, T22, T23, T24, T25, T26,
      T27, T28, T29, T30, T31, T32, T33, T34, T35, T36, T37, T38> type;
};
template <typename T1, typename T2, typename T3, typename T4, typename T5,
    typename T6, typename T7, typename T8, typename T9, typename T10,
    typename T11, typename T12, typename T13, typename T14, typename T15,
    typename T16, typename T17, typename T18, typename T19, typename T20,
    typename T21, typename T22, typename T23, typename T24, typename T25,
    typename T26, typename T27, typename T28, typename T29, typename T30,
    typename T31, typename T32, typename T33, typename T34, typename T35,
    typename T36, typename T37, typename T38, typename T39>
struct Types<T1, T2, T3, T4, T5, T6, T7, T8, T9, T10, T11, T12, T13, T14, T15,
    T16, T17, T18, T19, T20, T21, T22, T23, T24, T25, T26, T27, T28, T29, T30,
    T31, T32, T33, T34, T35, T36, T37, T38, T39, internal::None,
    internal::None, internal::None, internal::None, internal::None,
    internal::None, internal::None, internal::None, internal::None,
    internal::None, internal::None> {
  typedef internal::Types39<T1, T2, T3, T4, T5, T6, T7, T8, T9, T10, T11, T12,
      T13, T14, T15, T16, T17, T18, T19, T20, T21, T22, T23, T24, T25, T26,
      T27, T28, T29, T30, T31, T32, T33, T34, T35, T36, T37, T38, T39> type;
};
template <typename T1, typename T2, typename T3, typename T4, typename T5,
    typename T6, typename T7, typename T8, typename T9, typename T10,
    typename T11, typename T12, typename T13, typename T14, typename T15,
    typename T16, typename T17, typename T18, typename T19, typename T20,
    typename T21, typename T22, typename T23, typename T24, typename T25,
    typename T26, typename T27, typename T28, typename T29, typename T30,
    typename T31, typename T32, typename T33, typename T34, typename T35,
    typename T36, typename T37, typename T38, typename T39, typename T40>
struct Types<T1, T2, T3, T4, T5, T6, T7, T8, T9, T10, T11, T12, T13, T14, T15,
    T16, T17, T18, T19, T20, T21, T22, T23, T24, T25, T26, T27, T28, T29, T30,
    T31, T32, T33, T34, T35, T36, T37, T38, T39, T40, internal::None,
    internal::None, internal::None, internal::None, internal::None,
    internal::None, internal::None, internal::None, internal::None,
    internal::None> {
  typedef internal::Types40<T1, T2, T3, T4, T5, T6, T7, T8, T9, T10, T11, T12,
      T13, T14, T15, T16, T17, T18, T19, T20, T21, T22, T23, T24, T25, T26,
      T27, T28, T29, T30, T31, T32, T33, T34, T35, T36, T37, T38, T39,
      T40> type;
};
template <typename T1, typename T2, typename T3, typename T4, typename T5,
    typename T6, typename T7, typename T8, typename T9, typename T10,
    typename T11, typename T12, typename T13, typename T14, typename T15,
    typename T16, typename T17, typename T18, typename T19, typename T20,
    typename T21, typename T22, typename T23, typename T24, typename T25,
    typename T26, typename T27, typename T28, typename T29, typename T30,
    typename T31, typename T32, typename T33, typename T34, typename T35,
    typename T36, typename T37, typename T38, typename T39, typename T40,
    typename T41>
struct Types<T1, T2, T3, T4, T5, T6, T7, T8, T9, T10, T11, T12, T13, T14, T15,
    T16, T17, T18, T19, T20, T21, T22, T23, T24, T25, T26, T27, T28, T29, T30,
    T31, T32, T33, T34, T35, T36, T37, T38, T39, T40, T41, internal::None,
    internal::None, internal::None, internal::None, internal::None,
    internal::None, internal::None, internal::None, internal::None> {
  typedef internal::Types41<T1, T2, T3, T4, T5, T6, T7, T8, T9, T10, T11, T12,
      T13, T14, T15, T16, T17, T18, T19, T20, T21, T22, T23, T24, T25, T26,
      T27, T28, T29, T30, T31, T32, T33, T34, T35, T36, T37, T38, T39, T40,
      T41> type;
};
template <typename T1, typename T2, typename T3, typename T4, typename T5,
    typename T6, typename T7, typename T8, typename T9, typename T10,
    typename T11, typename T12, typename T13, typename T14, typename T15,
    typename T16, typename T17, typename T18, typename T19, typename T20,
    typename T21, typename T22, typename T23, typename T24, typename T25,
    typename T26, typename T27, typename T28, typename T29, typename T30,
    typename T31, typename T32, typename T33, typename T34, typename T35,
    typename T36, typename T37, typename T38, typename T39, typename T40,
    typename T41, typename T42>
struct Types<T1, T2, T3, T4, T5, T6, T7, T8, T9, T10, T11, T12, T13, T14, T15,
    T16, T17, T18, T19, T20, T21, T22, T23, T24, T25, T26, T27, T28, T29, T30,
    T31, T32, T33, T34, T35, T36, T37, T38, T39, T40, T41, T42, internal::None,
    internal::None, internal::None, internal::None, internal::None,
    internal::None, internal::None, internal::None> {
  typedef internal::Types42<T1, T2, T3, T4, T5, T6, T7, T8, T9, T10, T11, T12,
      T13, T14, T15, T16, T17, T18, T19, T20, T21, T22, T23, T24, T25, T26,
      T27, T28, T29, T30, T31, T32, T33, T34, T35, T36, T37, T38, T39, T40,
      T41, T42> type;
};
template <typename T1, typename T2, typename T3, typename T4, typename T5,
    typename T6, typename T7, typename T8, typename T9, typename T10,
    typename T11, typename T12, typename T13, typename T14, typename T15,
    typename T16, typename T17, typename T18, typename T19, typename T20,
    typename T21, typename T22, typename T23, typename T24, typename T25,
    typename T26, typename T27, typename T28, typename T29, typename T30,
    typename T31, typename T32, typename T33, typename T34, typename T35,
    typename T36, typename T37, typename T38, typename T39, typename T40,
    typename T41, typename T42, typename T43>
struct Types<T1, T2, T3, T4, T5, T6, T7, T8, T9, T10, T11, T12, T13, T14, T15,
    T16, T17, T18, T19, T20, T21, T22, T23, T24, T25, T26, T27, T28, T29, T30,
    T31, T32, T33, T34, T35, T36, T37, T38, T39, T40, T41, T42, T43,
    internal::None, internal::None, internal::None, internal::None,
    internal::None, internal::None, internal::None> {
  typedef internal::Types43<T1, T2, T3, T4, T5, T6, T7, T8, T9, T10, T11, T12,
      T13, T14, T15, T16, T17, T18, T19, T20, T21, T22, T23, T24, T25, T26,
      T27, T28, T29, T30, T31, T32, T33, T34, T35, T36, T37, T38, T39, T40,
      T41, T42, T43> type;
};
template <typename T1, typename T2, typename T3, typename T4, typename T5,
    typename T6, typename T7, typename T8, typename T9, typename T10,
    typename T11, typename T12, typename T13, typename T14, typename T15,
    typename T16, typename T17, typename T18, typename T19, typename T20,
    typename T21, typename T22, typename T23, typename T24, typename T25,
    typename T26, typename T27, typename T28, typename T29, typename T30,
    typename T31, typename T32, typename T33, typename T34, typename T35,
    typename T36, typename T37, typename T38, typename T39, typename T40,
    typename T41, typename T42, typename T43, typename T44>
struct Types<T1, T2, T3, T4, T5, T6, T7, T8, T9, T10, T11, T12, T13, T14, T15,
    T16, T17, T18, T19, T20, T21, T22, T23, T24, T25, T26, T27, T28, T29, T30,
    T31, T32, T33, T34, T35, T36, T37, T38, T39, T40, T41, T42, T43, T44,
    internal::None, internal::None, internal::None, internal::None,
    internal::None, internal::None> {
  typedef internal::Types44<T1, T2, T3, T4, T5, T6, T7, T8, T9, T10, T11, T12,
      T13, T14, T15, T16, T17, T18, T19, T20, T21, T22, T23, T24, T25, T26,
      T27, T28, T29, T30, T31, T32, T33, T34, T35, T36, T37, T38, T39, T40,
      T41, T42, T43, T44> type;
};
template <typename T1, typename T2, typename T3, typename T4, typename T5,
    typename T6, typename T7, typename T8, typename T9, typename T10,
    typename T11, typename T12, typename T13, typename T14, typename T15,
    typename T16, typename T17, typename T18, typename T19, typename T20,
    typename T21, typename T22, typename T23, typename T24, typename T25,
    typename T26, typename T27, typename T28, typename T29, typename T30,
    typename T31, typename T32, typename T33, typename T34, typename T35,
    typename T36, typename T37, typename T38, typename T39, typename T40,
    typename T41, typename T42, typename T43, typename T44, typename T45>
struct Types<T1, T2, T3, T4, T5, T6, T7, T8, T9, T10, T11, T12, T13, T14, T15,
    T16, T17, T18, T19, T20, T21, T22, T23, T24, T25, T26, T27, T28, T29, T30,
    T31, T32, T33, T34, T35, T36, T37, T38, T39, T40, T41, T42, T43, T44, T45,
    internal::None, internal::None, internal::None, internal::None,
    internal::None> {
  typedef internal::Types45<T1, T2, T3, T4, T5, T6, T7, T8, T9, T10, T11, T12,
      T13, T14, T15, T16, T17, T18, T19, T20, T21, T22, T23, T24, T25, T26,
      T27, T28, T29, T30, T31, T32, T33, T34, T35, T36, T37, T38, T39, T40,
      T41, T42, T43, T44, T45> type;
};
template <typename T1, typename T2, typename T3, typename T4, typename T5,
    typename T6, typename T7, typename T8, typename T9, typename T10,
    typename T11, typename T12, typename T13, typename T14, typename T15,
    typename T16, typename T17, typename T18, typename T19, typename T20,
    typename T21, typename T22, typename T23, typename T24, typename T25,
    typename T26, typename T27, typename T28, typename T29, typename T30,
    typename T31, typename T32, typename T33, typename T34, typename T35,
    typename T36, typename T37, typename T38, typename T39, typename T40,
    typename T41, typename T42, typename T43, typename T44, typename T45,
    typename T46>
struct Types<T1, T2, T3, T4, T5, T6, T7, T8, T9, T10, T11, T12, T13, T14, T15,
    T16, T17, T18, T19, T20, T21, T22, T23, T24, T25, T26, T27, T28, T29, T30,
    T31, T32, T33, T34, T35, T36, T37, T38, T39, T40, T41, T42, T43, T44, T45,
    T46, internal::None, internal::None, internal::None, internal::None> {
  typedef internal::Types46<T1, T2, T3, T4, T5, T6, T7, T8, T9, T10, T11, T12,
      T13, T14, T15, T16, T17, T18, T19, T20, T21, T22, T23, T24, T25, T26,
      T27, T28, T29, T30, T31, T32, T33, T34, T35, T36, T37, T38, T39, T40,
      T41, T42, T43, T44, T45, T46> type;
};
template <typename T1, typename T2, typename T3, typename T4, typename T5,
    typename T6, typename T7, typename T8, typename T9, typename T10,
    typename T11, typename T12, typename T13, typename T14, typename T15,
    typename T16, typename T17, typename T18, typename T19, typename T20,
    typename T21, typename T22, typename T23, typename T24, typename T25,
    typename T26, typename T27, typename T28, typename T29, typename T30,
    typename T31, typename T32, typename T33, typename T34, typename T35,
    typename T36, typename T37, typename T38, typename T39, typename T40,
    typename T41, typename T42, typename T43, typename T44, typename T45,
    typename T46, typename T47>
struct Types<T1, T2, T3, T4, T5, T6, T7, T8, T9, T10, T11, T12, T13, T14, T15,
    T16, T17, T18, T19, T20, T21, T22, T23, T24, T25, T26, T27, T28, T29, T30,
    T31, T32, T33, T34, T35, T36, T37, T38, T39, T40, T41, T42, T43, T44, T45,
    T46, T47, internal::None, internal::None, internal::None> {
  typedef internal::Types47<T1, T2, T3, T4, T5, T6, T7, T8, T9, T10, T11, T12,
      T13, T14, T15, T16, T17, T18, T19, T20, T21, T22, T23, T24, T25, T26,
      T27, T28, T29, T30, T31, T32, T33, T34, T35, T36, T37, T38, T39, T40,
      T41, T42, T43, T44, T45, T46, T47> type;
};
template <typename T1, typename T2, typename T3, typename T4, typename T5,
    typename T6, typename T7, typename T8, typename T9, typename T10,
    typename T11, typename T12, typename T13, typename T14, typename T15,
    typename T16, typename T17, typename T18, typename T19, typename T20,
    typename T21, typename T22, typename T23, typename T24, typename T25,
    typename T26, typename T27, typename T28, typename T29, typename T30,
    typename T31, typename T32, typename T33, typename T34, typename T35,
    typename T36, typename T37, typename T38, typename T39, typename T40,
    typename T41, typename T42, typename T43, typename T44, typename T45,
    typename T46, typename T47, typename T48>
struct Types<T1, T2, T3, T4, T5, T6, T7, T8, T9, T10, T11, T12, T13, T14, T15,
    T16, T17, T18, T19, T20, T21, T22, T23, T24, T25, T26, T27, T28, T29, T30,
    T31, T32, T33, T34, T35, T36, T37, T38, T39, T40, T41, T42, T43, T44, T45,
    T46, T47, T48, internal::None, internal::None> {
  typedef internal::Types48<T1, T2, T3, T4, T5, T6, T7, T8, T9, T10, T11, T12,
      T13, T14, T15, T16, T17, T18, T19, T20, T21, T22, T23, T24, T25, T26,
      T27, T28, T29, T30, T31, T32, T33, T34, T35, T36, T37, T38, T39, T40,
      T41, T42, T43, T44, T45, T46, T47, T48> type;
};
template <typename T1, typename T2, typename T3, typename T4, typename T5,
    typename T6, typename T7, typename T8, typename T9, typename T10,
    typename T11, typename T12, typename T13, typename T14, typename T15,
    typename T16, typename T17, typename T18, typename T19, typename T20,
    typename T21, typename T22, typename T23, typename T24, typename T25,
    typename T26, typename T27, typename T28, typename T29, typename T30,
    typename T31, typename T32, typename T33, typename T34, typename T35,
    typename T36, typename T37, typename T38, typename T39, typename T40,
    typename T41, typename T42, typename T43, typename T44, typename T45,
    typename T46, typename T47, typename T48, typename T49>
struct Types<T1, T2, T3, T4, T5, T6, T7, T8, T9, T10, T11, T12, T13, T14, T15,
    T16, T17, T18, T19, T20, T21, T22, T23, T24, T25, T26, T27, T28, T29, T30,
    T31, T32, T33, T34, T35, T36, T37, T38, T39, T40, T41, T42, T43, T44, T45,
    T46, T47, T48, T49, internal::None> {
  typedef internal::Types49<T1, T2, T3, T4, T5, T6, T7, T8, T9, T10, T11, T12,
      T13, T14, T15, T16, T17, T18, T19, T20, T21, T22, T23, T24, T25, T26,
      T27, T28, T29, T30, T31, T32, T33, T34, T35, T36, T37, T38, T39, T40,
      T41, T42, T43, T44, T45, T46, T47, T48, T49> type;
};

namespace internal {

# define GTEST_TEMPLATE_ template <typename T> class

// The template "selector" struct TemplateSel<Tmpl> is used to
// represent Tmpl, which must be a class template with one type
// parameter, as a type.  TemplateSel<Tmpl>::Bind<T>::type is defined
// as the type Tmpl<T>.  This allows us to actually instantiate the
// template "selected" by TemplateSel<Tmpl>.
//
// This trick is necessary for simulating typedef for class templates,
// which C++ doesn't support directly.
template <GTEST_TEMPLATE_ Tmpl>
struct TemplateSel {
  template <typename T>
  struct Bind {
    typedef Tmpl<T> type;
  };
};

# define GTEST_BIND_(TmplSel, T) \
  TmplSel::template Bind<T>::type

// A unique struct template used as the default value for the
// arguments of class template Templates.  This allows us to simulate
// variadic templates (e.g. Templates<int>, Templates<int, double>,
// and etc), which C++ doesn't support directly.
template <typename T>
struct NoneT {};

// The following family of struct and struct templates are used to
// represent template lists.  In particular, TemplatesN<T1, T2, ...,
// TN> represents a list of N templates (T1, T2, ..., and TN).  Except
// for Templates0, every struct in the family has two member types:
// Head for the selector of the first template in the list, and Tail
// for the rest of the list.

// The empty template list.
struct Templates0 {};

// Template lists of length 1, 2, 3, and so on.

template <GTEST_TEMPLATE_ T1>
struct Templates1 {
  typedef TemplateSel<T1> Head;
  typedef Templates0 Tail;
};
template <GTEST_TEMPLATE_ T1, GTEST_TEMPLATE_ T2>
struct Templates2 {
  typedef TemplateSel<T1> Head;
  typedef Templates1<T2> Tail;
};

template <GTEST_TEMPLATE_ T1, GTEST_TEMPLATE_ T2, GTEST_TEMPLATE_ T3>
struct Templates3 {
  typedef TemplateSel<T1> Head;
  typedef Templates2<T2, T3> Tail;
};

template <GTEST_TEMPLATE_ T1, GTEST_TEMPLATE_ T2, GTEST_TEMPLATE_ T3,
    GTEST_TEMPLATE_ T4>
struct Templates4 {
  typedef TemplateSel<T1> Head;
  typedef Templates3<T2, T3, T4> Tail;
};

template <GTEST_TEMPLATE_ T1, GTEST_TEMPLATE_ T2, GTEST_TEMPLATE_ T3,
    GTEST_TEMPLATE_ T4, GTEST_TEMPLATE_ T5>
struct Templates5 {
  typedef TemplateSel<T1> Head;
  typedef Templates4<T2, T3, T4, T5> Tail;
};

template <GTEST_TEMPLATE_ T1, GTEST_TEMPLATE_ T2, GTEST_TEMPLATE_ T3,
    GTEST_TEMPLATE_ T4, GTEST_TEMPLATE_ T5, GTEST_TEMPLATE_ T6>
struct Templates6 {
  typedef TemplateSel<T1> Head;
  typedef Templates5<T2, T3, T4, T5, T6> Tail;
};

template <GTEST_TEMPLATE_ T1, GTEST_TEMPLATE_ T2, GTEST_TEMPLATE_ T3,
    GTEST_TEMPLATE_ T4, GTEST_TEMPLATE_ T5, GTEST_TEMPLATE_ T6,
    GTEST_TEMPLATE_ T7>
struct Templates7 {
  typedef TemplateSel<T1> Head;
  typedef Templates6<T2, T3, T4, T5, T6, T7> Tail;
};

template <GTEST_TEMPLATE_ T1, GTEST_TEMPLATE_ T2, GTEST_TEMPLATE_ T3,
    GTEST_TEMPLATE_ T4, GTEST_TEMPLATE_ T5, GTEST_TEMPLATE_ T6,
    GTEST_TEMPLATE_ T7, GTEST_TEMPLATE_ T8>
struct Templates8 {
  typedef TemplateSel<T1> Head;
  typedef Templates7<T2, T3, T4, T5, T6, T7, T8> Tail;
};

template <GTEST_TEMPLATE_ T1, GTEST_TEMPLATE_ T2, GTEST_TEMPLATE_ T3,
    GTEST_TEMPLATE_ T4, GTEST_TEMPLATE_ T5, GTEST_TEMPLATE_ T6,
    GTEST_TEMPLATE_ T7, GTEST_TEMPLATE_ T8, GTEST_TEMPLATE_ T9>
struct Templates9 {
  typedef TemplateSel<T1> Head;
  typedef Templates8<T2, T3, T4, T5, T6, T7, T8, T9> Tail;
};

template <GTEST_TEMPLATE_ T1, GTEST_TEMPLATE_ T2, GTEST_TEMPLATE_ T3,
    GTEST_TEMPLATE_ T4, GTEST_TEMPLATE_ T5, GTEST_TEMPLATE_ T6,
    GTEST_TEMPLATE_ T7, GTEST_TEMPLATE_ T8, GTEST_TEMPLATE_ T9,
    GTEST_TEMPLATE_ T10>
struct Templates10 {
  typedef TemplateSel<T1> Head;
  typedef Templates9<T2, T3, T4, T5, T6, T7, T8, T9, T10> Tail;
};

template <GTEST_TEMPLATE_ T1, GTEST_TEMPLATE_ T2, GTEST_TEMPLATE_ T3,
    GTEST_TEMPLATE_ T4, GTEST_TEMPLATE_ T5, GTEST_TEMPLATE_ T6,
    GTEST_TEMPLATE_ T7, GTEST_TEMPLATE_ T8, GTEST_TEMPLATE_ T9,
    GTEST_TEMPLATE_ T10, GTEST_TEMPLATE_ T11>
struct Templates11 {
  typedef TemplateSel<T1> Head;
  typedef Templates10<T2, T3, T4, T5, T6, T7, T8, T9, T10, T11> Tail;
};

template <GTEST_TEMPLATE_ T1, GTEST_TEMPLATE_ T2, GTEST_TEMPLATE_ T3,
    GTEST_TEMPLATE_ T4, GTEST_TEMPLATE_ T5, GTEST_TEMPLATE_ T6,
    GTEST_TEMPLATE_ T7, GTEST_TEMPLATE_ T8, GTEST_TEMPLATE_ T9,
    GTEST_TEMPLATE_ T10, GTEST_TEMPLATE_ T11, GTEST_TEMPLATE_ T12>
struct Templates12 {
  typedef TemplateSel<T1> Head;
  typedef Templates11<T2, T3, T4, T5, T6, T7, T8, T9, T10, T11, T12> Tail;
};

template <GTEST_TEMPLATE_ T1, GTEST_TEMPLATE_ T2, GTEST_TEMPLATE_ T3,
    GTEST_TEMPLATE_ T4, GTEST_TEMPLATE_ T5, GTEST_TEMPLATE_ T6,
    GTEST_TEMPLATE_ T7, GTEST_TEMPLATE_ T8, GTEST_TEMPLATE_ T9,
    GTEST_TEMPLATE_ T10, GTEST_TEMPLATE_ T11, GTEST_TEMPLATE_ T12,
    GTEST_TEMPLATE_ T13>
struct Templates13 {
  typedef TemplateSel<T1> Head;
  typedef Templates12<T2, T3, T4, T5, T6, T7, T8, T9, T10, T11, T12, T13> Tail;
};

template <GTEST_TEMPLATE_ T1, GTEST_TEMPLATE_ T2, GTEST_TEMPLATE_ T3,
    GTEST_TEMPLATE_ T4, GTEST_TEMPLATE_ T5, GTEST_TEMPLATE_ T6,
    GTEST_TEMPLATE_ T7, GTEST_TEMPLATE_ T8, GTEST_TEMPLATE_ T9,
    GTEST_TEMPLATE_ T10, GTEST_TEMPLATE_ T11, GTEST_TEMPLATE_ T12,
    GTEST_TEMPLATE_ T13, GTEST_TEMPLATE_ T14>
struct Templates14 {
  typedef TemplateSel<T1> Head;
  typedef Templates13<T2, T3, T4, T5, T6, T7, T8, T9, T10, T11, T12, T13,
      T14> Tail;
};

template <GTEST_TEMPLATE_ T1, GTEST_TEMPLATE_ T2, GTEST_TEMPLATE_ T3,
    GTEST_TEMPLATE_ T4, GTEST_TEMPLATE_ T5, GTEST_TEMPLATE_ T6,
    GTEST_TEMPLATE_ T7, GTEST_TEMPLATE_ T8, GTEST_TEMPLATE_ T9,
    GTEST_TEMPLATE_ T10, GTEST_TEMPLATE_ T11, GTEST_TEMPLATE_ T12,
    GTEST_TEMPLATE_ T13, GTEST_TEMPLATE_ T14, GTEST_TEMPLATE_ T15>
struct Templates15 {
  typedef TemplateSel<T1> Head;
  typedef Templates14<T2, T3, T4, T5, T6, T7, T8, T9, T10, T11, T12, T13, T14,
      T15> Tail;
};

template <GTEST_TEMPLATE_ T1, GTEST_TEMPLATE_ T2, GTEST_TEMPLATE_ T3,
    GTEST_TEMPLATE_ T4, GTEST_TEMPLATE_ T5, GTEST_TEMPLATE_ T6,
    GTEST_TEMPLATE_ T7, GTEST_TEMPLATE_ T8, GTEST_TEMPLATE_ T9,
    GTEST_TEMPLATE_ T10, GTEST_TEMPLATE_ T11, GTEST_TEMPLATE_ T12,
    GTEST_TEMPLATE_ T13, GTEST_TEMPLATE_ T14, GTEST_TEMPLATE_ T15,
    GTEST_TEMPLATE_ T16>
struct Templates16 {
  typedef TemplateSel<T1> Head;
  typedef Templates15<T2, T3, T4, T5, T6, T7, T8, T9, T10, T11, T12, T13, T14,
      T15, T16> Tail;
};

template <GTEST_TEMPLATE_ T1, GTEST_TEMPLATE_ T2, GTEST_TEMPLATE_ T3,
    GTEST_TEMPLATE_ T4, GTEST_TEMPLATE_ T5, GTEST_TEMPLATE_ T6,
    GTEST_TEMPLATE_ T7, GTEST_TEMPLATE_ T8, GTEST_TEMPLATE_ T9,
    GTEST_TEMPLATE_ T10, GTEST_TEMPLATE_ T11, GTEST_TEMPLATE_ T12,
    GTEST_TEMPLATE_ T13, GTEST_TEMPLATE_ T14, GTEST_TEMPLATE_ T15,
    GTEST_TEMPLATE_ T16, GTEST_TEMPLATE_ T17>
struct Templates17 {
  typedef TemplateSel<T1> Head;
  typedef Templates16<T2, T3, T4, T5, T6, T7, T8, T9, T10, T11, T12, T13, T14,
      T15, T16, T17> Tail;
};

template <GTEST_TEMPLATE_ T1, GTEST_TEMPLATE_ T2, GTEST_TEMPLATE_ T3,
    GTEST_TEMPLATE_ T4, GTEST_TEMPLATE_ T5, GTEST_TEMPLATE_ T6,
    GTEST_TEMPLATE_ T7, GTEST_TEMPLATE_ T8, GTEST_TEMPLATE_ T9,
    GTEST_TEMPLATE_ T10, GTEST_TEMPLATE_ T11, GTEST_TEMPLATE_ T12,
    GTEST_TEMPLATE_ T13, GTEST_TEMPLATE_ T14, GTEST_TEMPLATE_ T15,
    GTEST_TEMPLATE_ T16, GTEST_TEMPLATE_ T17, GTEST_TEMPLATE_ T18>
struct Templates18 {
  typedef TemplateSel<T1> Head;
  typedef Templates17<T2, T3, T4, T5, T6, T7, T8, T9, T10, T11, T12, T13, T14,
      T15, T16, T17, T18> Tail;
};

template <GTEST_TEMPLATE_ T1, GTEST_TEMPLATE_ T2, GTEST_TEMPLATE_ T3,
    GTEST_TEMPLATE_ T4, GTEST_TEMPLATE_ T5, GTEST_TEMPLATE_ T6,
    GTEST_TEMPLATE_ T7, GTEST_TEMPLATE_ T8, GTEST_TEMPLATE_ T9,
    GTEST_TEMPLATE_ T10, GTEST_TEMPLATE_ T11, GTEST_TEMPLATE_ T12,
    GTEST_TEMPLATE_ T13, GTEST_TEMPLATE_ T14, GTEST_TEMPLATE_ T15,
    GTEST_TEMPLATE_ T16, GTEST_TEMPLATE_ T17, GTEST_TEMPLATE_ T18,
    GTEST_TEMPLATE_ T19>
struct Templates19 {
  typedef TemplateSel<T1> Head;
  typedef Templates18<T2, T3, T4, T5, T6, T7, T8, T9, T10, T11, T12, T13, T14,
      T15, T16, T17, T18, T19> Tail;
};

template <GTEST_TEMPLATE_ T1, GTEST_TEMPLATE_ T2, GTEST_TEMPLATE_ T3,
    GTEST_TEMPLATE_ T4, GTEST_TEMPLATE_ T5, GTEST_TEMPLATE_ T6,
    GTEST_TEMPLATE_ T7, GTEST_TEMPLATE_ T8, GTEST_TEMPLATE_ T9,
    GTEST_TEMPLATE_ T10, GTEST_TEMPLATE_ T11, GTEST_TEMPLATE_ T12,
    GTEST_TEMPLATE_ T13, GTEST_TEMPLATE_ T14, GTEST_TEMPLATE_ T15,
    GTEST_TEMPLATE_ T16, GTEST_TEMPLATE_ T17, GTEST_TEMPLATE_ T18,
    GTEST_TEMPLATE_ T19, GTEST_TEMPLATE_ T20>
struct Templates20 {
  typedef TemplateSel<T1> Head;
  typedef Templates19<T2, T3, T4, T5, T6, T7, T8, T9, T10, T11, T12, T13, T14,
      T15, T16, T17, T18, T19, T20> Tail;
};

template <GTEST_TEMPLATE_ T1, GTEST_TEMPLATE_ T2, GTEST_TEMPLATE_ T3,
    GTEST_TEMPLATE_ T4, GTEST_TEMPLATE_ T5, GTEST_TEMPLATE_ T6,
    GTEST_TEMPLATE_ T7, GTEST_TEMPLATE_ T8, GTEST_TEMPLATE_ T9,
    GTEST_TEMPLATE_ T10, GTEST_TEMPLATE_ T11, GTEST_TEMPLATE_ T12,
    GTEST_TEMPLATE_ T13, GTEST_TEMPLATE_ T14, GTEST_TEMPLATE_ T15,
    GTEST_TEMPLATE_ T16, GTEST_TEMPLATE_ T17, GTEST_TEMPLATE_ T18,
    GTEST_TEMPLATE_ T19, GTEST_TEMPLATE_ T20, GTEST_TEMPLATE_ T21>
struct Templates21 {
  typedef TemplateSel<T1> Head;
  typedef Templates20<T2, T3, T4, T5, T6, T7, T8, T9, T10, T11, T12, T13, T14,
      T15, T16, T17, T18, T19, T20, T21> Tail;
};

template <GTEST_TEMPLATE_ T1, GTEST_TEMPLATE_ T2, GTEST_TEMPLATE_ T3,
    GTEST_TEMPLATE_ T4, GTEST_TEMPLATE_ T5, GTEST_TEMPLATE_ T6,
    GTEST_TEMPLATE_ T7, GTEST_TEMPLATE_ T8, GTEST_TEMPLATE_ T9,
    GTEST_TEMPLATE_ T10, GTEST_TEMPLATE_ T11, GTEST_TEMPLATE_ T12,
    GTEST_TEMPLATE_ T13, GTEST_TEMPLATE_ T14, GTEST_TEMPLATE_ T15,
    GTEST_TEMPLATE_ T16, GTEST_TEMPLATE_ T17, GTEST_TEMPLATE_ T18,
    GTEST_TEMPLATE_ T19, GTEST_TEMPLATE_ T20, GTEST_TEMPLATE_ T21,
    GTEST_TEMPLATE_ T22>
struct Templates22 {
  typedef TemplateSel<T1> Head;
  typedef Templates21<T2, T3, T4, T5, T6, T7, T8, T9, T10, T11, T12, T13, T14,
      T15, T16, T17, T18, T19, T20, T21, T22> Tail;
};

template <GTEST_TEMPLATE_ T1, GTEST_TEMPLATE_ T2, GTEST_TEMPLATE_ T3,
    GTEST_TEMPLATE_ T4, GTEST_TEMPLATE_ T5, GTEST_TEMPLATE_ T6,
    GTEST_TEMPLATE_ T7, GTEST_TEMPLATE_ T8, GTEST_TEMPLATE_ T9,
    GTEST_TEMPLATE_ T10, GTEST_TEMPLATE_ T11, GTEST_TEMPLATE_ T12,
    GTEST_TEMPLATE_ T13, GTEST_TEMPLATE_ T14, GTEST_TEMPLATE_ T15,
    GTEST_TEMPLATE_ T16, GTEST_TEMPLATE_ T17, GTEST_TEMPLATE_ T18,
    GTEST_TEMPLATE_ T19, GTEST_TEMPLATE_ T20, GTEST_TEMPLATE_ T21,
    GTEST_TEMPLATE_ T22, GTEST_TEMPLATE_ T23>
struct Templates23 {
  typedef TemplateSel<T1> Head;
  typedef Templates22<T2, T3, T4, T5, T6, T7, T8, T9, T10, T11, T12, T13, T14,
      T15, T16, T17, T18, T19, T20, T21, T22, T23> Tail;
};

template <GTEST_TEMPLATE_ T1, GTEST_TEMPLATE_ T2, GTEST_TEMPLATE_ T3,
    GTEST_TEMPLATE_ T4, GTEST_TEMPLATE_ T5, GTEST_TEMPLATE_ T6,
    GTEST_TEMPLATE_ T7, GTEST_TEMPLATE_ T8, GTEST_TEMPLATE_ T9,
    GTEST_TEMPLATE_ T10, GTEST_TEMPLATE_ T11, GTEST_TEMPLATE_ T12,
    GTEST_TEMPLATE_ T13, GTEST_TEMPLATE_ T14, GTEST_TEMPLATE_ T15,
    GTEST_TEMPLATE_ T16, GTEST_TEMPLATE_ T17, GTEST_TEMPLATE_ T18,
    GTEST_TEMPLATE_ T19, GTEST_TEMPLATE_ T20, GTEST_TEMPLATE_ T21,
    GTEST_TEMPLATE_ T22, GTEST_TEMPLATE_ T23, GTEST_TEMPLATE_ T24>
struct Templates24 {
  typedef TemplateSel<T1> Head;
  typedef Templates23<T2, T3, T4, T5, T6, T7, T8, T9, T10, T11, T12, T13, T14,
      T15, T16, T17, T18, T19, T20, T21, T22, T23, T24> Tail;
};

template <GTEST_TEMPLATE_ T1, GTEST_TEMPLATE_ T2, GTEST_TEMPLATE_ T3,
    GTEST_TEMPLATE_ T4, GTEST_TEMPLATE_ T5, GTEST_TEMPLATE_ T6,
    GTEST_TEMPLATE_ T7, GTEST_TEMPLATE_ T8, GTEST_TEMPLATE_ T9,
    GTEST_TEMPLATE_ T10, GTEST_TEMPLATE_ T11, GTEST_TEMPLATE_ T12,
    GTEST_TEMPLATE_ T13, GTEST_TEMPLATE_ T14, GTEST_TEMPLATE_ T15,
    GTEST_TEMPLATE_ T16, GTEST_TEMPLATE_ T17, GTEST_TEMPLATE_ T18,
    GTEST_TEMPLATE_ T19, GTEST_TEMPLATE_ T20, GTEST_TEMPLATE_ T21,
    GTEST_TEMPLATE_ T22, GTEST_TEMPLATE_ T23, GTEST_TEMPLATE_ T24,
    GTEST_TEMPLATE_ T25>
struct Templates25 {
  typedef TemplateSel<T1> Head;
  typedef Templates24<T2, T3, T4, T5, T6, T7, T8, T9, T10, T11, T12, T13, T14,
      T15, T16, T17, T18, T19, T20, T21, T22, T23, T24, T25> Tail;
};

template <GTEST_TEMPLATE_ T1, GTEST_TEMPLATE_ T2, GTEST_TEMPLATE_ T3,
    GTEST_TEMPLATE_ T4, GTEST_TEMPLATE_ T5, GTEST_TEMPLATE_ T6,
    GTEST_TEMPLATE_ T7, GTEST_TEMPLATE_ T8, GTEST_TEMPLATE_ T9,
    GTEST_TEMPLATE_ T10, GTEST_TEMPLATE_ T11, GTEST_TEMPLATE_ T12,
    GTEST_TEMPLATE_ T13, GTEST_TEMPLATE_ T14, GTEST_TEMPLATE_ T15,
    GTEST_TEMPLATE_ T16, GTEST_TEMPLATE_ T17, GTEST_TEMPLATE_ T18,
    GTEST_TEMPLATE_ T19, GTEST_TEMPLATE_ T20, GTEST_TEMPLATE_ T21,
    GTEST_TEMPLATE_ T22, GTEST_TEMPLATE_ T23, GTEST_TEMPLATE_ T24,
    GTEST_TEMPLATE_ T25, GTEST_TEMPLATE_ T26>
struct Templates26 {
  typedef TemplateSel<T1> Head;
  typedef Templates25<T2, T3, T4, T5, T6, T7, T8, T9, T10, T11, T12, T13, T14,
      T15, T16, T17, T18, T19, T20, T21, T22, T23, T24, T25, T26> Tail;
};

template <GTEST_TEMPLATE_ T1, GTEST_TEMPLATE_ T2, GTEST_TEMPLATE_ T3,
    GTEST_TEMPLATE_ T4, GTEST_TEMPLATE_ T5, GTEST_TEMPLATE_ T6,
    GTEST_TEMPLATE_ T7, GTEST_TEMPLATE_ T8, GTEST_TEMPLATE_ T9,
    GTEST_TEMPLATE_ T10, GTEST_TEMPLATE_ T11, GTEST_TEMPLATE_ T12,
    GTEST_TEMPLATE_ T13, GTEST_TEMPLATE_ T14, GTEST_TEMPLATE_ T15,
    GTEST_TEMPLATE_ T16, GTEST_TEMPLATE_ T17, GTEST_TEMPLATE_ T18,
    GTEST_TEMPLATE_ T19, GTEST_TEMPLATE_ T20, GTEST_TEMPLATE_ T21,
    GTEST_TEMPLATE_ T22, GTEST_TEMPLATE_ T23, GTEST_TEMPLATE_ T24,
    GTEST_TEMPLATE_ T25, GTEST_TEMPLATE_ T26, GTEST_TEMPLATE_ T27>
struct Templates27 {
  typedef TemplateSel<T1> Head;
  typedef Templates26<T2, T3, T4, T5, T6, T7, T8, T9, T10, T11, T12, T13, T14,
      T15, T16, T17, T18, T19, T20, T21, T22, T23, T24, T25, T26, T27> Tail;
};

template <GTEST_TEMPLATE_ T1, GTEST_TEMPLATE_ T2, GTEST_TEMPLATE_ T3,
    GTEST_TEMPLATE_ T4, GTEST_TEMPLATE_ T5, GTEST_TEMPLATE_ T6,
    GTEST_TEMPLATE_ T7, GTEST_TEMPLATE_ T8, GTEST_TEMPLATE_ T9,
    GTEST_TEMPLATE_ T10, GTEST_TEMPLATE_ T11, GTEST_TEMPLATE_ T12,
    GTEST_TEMPLATE_ T13, GTEST_TEMPLATE_ T14, GTEST_TEMPLATE_ T15,
    GTEST_TEMPLATE_ T16, GTEST_TEMPLATE_ T17, GTEST_TEMPLATE_ T18,
    GTEST_TEMPLATE_ T19, GTEST_TEMPLATE_ T20, GTEST_TEMPLATE_ T21,
    GTEST_TEMPLATE_ T22, GTEST_TEMPLATE_ T23, GTEST_TEMPLATE_ T24,
    GTEST_TEMPLATE_ T25, GTEST_TEMPLATE_ T26, GTEST_TEMPLATE_ T27,
    GTEST_TEMPLATE_ T28>
struct Templates28 {
  typedef TemplateSel<T1> Head;
  typedef Templates27<T2, T3, T4, T5, T6, T7, T8, T9, T10, T11, T12, T13, T14,
      T15, T16, T17, T18, T19, T20, T21, T22, T23, T24, T25, T26, T27,
      T28> Tail;
};

template <GTEST_TEMPLATE_ T1, GTEST_TEMPLATE_ T2, GTEST_TEMPLATE_ T3,
    GTEST_TEMPLATE_ T4, GTEST_TEMPLATE_ T5, GTEST_TEMPLATE_ T6,
    GTEST_TEMPLATE_ T7, GTEST_TEMPLATE_ T8, GTEST_TEMPLATE_ T9,
    GTEST_TEMPLATE_ T10, GTEST_TEMPLATE_ T11, GTEST_TEMPLATE_ T12,
    GTEST_TEMPLATE_ T13, GTEST_TEMPLATE_ T14, GTEST_TEMPLATE_ T15,
    GTEST_TEMPLATE_ T16, GTEST_TEMPLATE_ T17, GTEST_TEMPLATE_ T18,
    GTEST_TEMPLATE_ T19, GTEST_TEMPLATE_ T20, GTEST_TEMPLATE_ T21,
    GTEST_TEMPLATE_ T22, GTEST_TEMPLATE_ T23, GTEST_TEMPLATE_ T24,
    GTEST_TEMPLATE_ T25, GTEST_TEMPLATE_ T26, GTEST_TEMPLATE_ T27,
    GTEST_TEMPLATE_ T28, GTEST_TEMPLATE_ T29>
struct Templates29 {
  typedef TemplateSel<T1> Head;
  typedef Templates28<T2, T3, T4, T5, T6, T7, T8, T9, T10, T11, T12, T13, T14,
      T15, T16, T17, T18, T19, T20, T21, T22, T23, T24, T25, T26, T27, T28,
      T29> Tail;
};

template <GTEST_TEMPLATE_ T1, GTEST_TEMPLATE_ T2, GTEST_TEMPLATE_ T3,
    GTEST_TEMPLATE_ T4, GTEST_TEMPLATE_ T5, GTEST_TEMPLATE_ T6,
    GTEST_TEMPLATE_ T7, GTEST_TEMPLATE_ T8, GTEST_TEMPLATE_ T9,
    GTEST_TEMPLATE_ T10, GTEST_TEMPLATE_ T11, GTEST_TEMPLATE_ T12,
    GTEST_TEMPLATE_ T13, GTEST_TEMPLATE_ T14, GTEST_TEMPLATE_ T15,
    GTEST_TEMPLATE_ T16, GTEST_TEMPLATE_ T17, GTEST_TEMPLATE_ T18,
    GTEST_TEMPLATE_ T19, GTEST_TEMPLATE_ T20, GTEST_TEMPLATE_ T21,
    GTEST_TEMPLATE_ T22, GTEST_TEMPLATE_ T23, GTEST_TEMPLATE_ T24,
    GTEST_TEMPLATE_ T25, GTEST_TEMPLATE_ T26, GTEST_TEMPLATE_ T27,
    GTEST_TEMPLATE_ T28, GTEST_TEMPLATE_ T29, GTEST_TEMPLATE_ T30>
struct Templates30 {
  typedef TemplateSel<T1> Head;
  typedef Templates29<T2, T3, T4, T5, T6, T7, T8, T9, T10, T11, T12, T13, T14,
      T15, T16, T17, T18, T19, T20, T21, T22, T23, T24, T25, T26, T27, T28,
      T29, T30> Tail;
};

template <GTEST_TEMPLATE_ T1, GTEST_TEMPLATE_ T2, GTEST_TEMPLATE_ T3,
    GTEST_TEMPLATE_ T4, GTEST_TEMPLATE_ T5, GTEST_TEMPLATE_ T6,
    GTEST_TEMPLATE_ T7, GTEST_TEMPLATE_ T8, GTEST_TEMPLATE_ T9,
    GTEST_TEMPLATE_ T10, GTEST_TEMPLATE_ T11, GTEST_TEMPLATE_ T12,
    GTEST_TEMPLATE_ T13, GTEST_TEMPLATE_ T14, GTEST_TEMPLATE_ T15,
    GTEST_TEMPLATE_ T16, GTEST_TEMPLATE_ T17, GTEST_TEMPLATE_ T18,
    GTEST_TEMPLATE_ T19, GTEST_TEMPLATE_ T20, GTEST_TEMPLATE_ T21,
    GTEST_TEMPLATE_ T22, GTEST_TEMPLATE_ T23, GTEST_TEMPLATE_ T24,
    GTEST_TEMPLATE_ T25, GTEST_TEMPLATE_ T26, GTEST_TEMPLATE_ T27,
    GTEST_TEMPLATE_ T28, GTEST_TEMPLATE_ T29, GTEST_TEMPLATE_ T30,
    GTEST_TEMPLATE_ T31>
struct Templates31 {
  typedef TemplateSel<T1> Head;
  typedef Templates30<T2, T3, T4, T5, T6, T7, T8, T9, T10, T11, T12, T13, T14,
      T15, T16, T17, T18, T19, T20, T21, T22, T23, T24, T25, T26, T27, T28,
      T29, T30, T31> Tail;
};

template <GTEST_TEMPLATE_ T1, GTEST_TEMPLATE_ T2, GTEST_TEMPLATE_ T3,
    GTEST_TEMPLATE_ T4, GTEST_TEMPLATE_ T5, GTEST_TEMPLATE_ T6,
    GTEST_TEMPLATE_ T7, GTEST_TEMPLATE_ T8, GTEST_TEMPLATE_ T9,
    GTEST_TEMPLATE_ T10, GTEST_TEMPLATE_ T11, GTEST_TEMPLATE_ T12,
    GTEST_TEMPLATE_ T13, GTEST_TEMPLATE_ T14, GTEST_TEMPLATE_ T15,
    GTEST_TEMPLATE_ T16, GTEST_TEMPLATE_ T17, GTEST_TEMPLATE_ T18,
    GTEST_TEMPLATE_ T19, GTEST_TEMPLATE_ T20, GTEST_TEMPLATE_ T21,
    GTEST_TEMPLATE_ T22, GTEST_TEMPLATE_ T23, GTEST_TEMPLATE_ T24,
    GTEST_TEMPLATE_ T25, GTEST_TEMPLATE_ T26, GTEST_TEMPLATE_ T27,
    GTEST_TEMPLATE_ T28, GTEST_TEMPLATE_ T29, GTEST_TEMPLATE_ T30,
    GTEST_TEMPLATE_ T31, GTEST_TEMPLATE_ T32>
struct Templates32 {
  typedef TemplateSel<T1> Head;
  typedef Templates31<T2, T3, T4, T5, T6, T7, T8, T9, T10, T11, T12, T13, T14,
      T15, T16, T17, T18, T19, T20, T21, T22, T23, T24, T25, T26, T27, T28,
      T29, T30, T31, T32> Tail;
};

template <GTEST_TEMPLATE_ T1, GTEST_TEMPLATE_ T2, GTEST_TEMPLATE_ T3,
    GTEST_TEMPLATE_ T4, GTEST_TEMPLATE_ T5, GTEST_TEMPLATE_ T6,
    GTEST_TEMPLATE_ T7, GTEST_TEMPLATE_ T8, GTEST_TEMPLATE_ T9,
    GTEST_TEMPLATE_ T10, GTEST_TEMPLATE_ T11, GTEST_TEMPLATE_ T12,
    GTEST_TEMPLATE_ T13, GTEST_TEMPLATE_ T14, GTEST_TEMPLATE_ T15,
    GTEST_TEMPLATE_ T16, GTEST_TEMPLATE_ T17, GTEST_TEMPLATE_ T18,
    GTEST_TEMPLATE_ T19, GTEST_TEMPLATE_ T20, GTEST_TEMPLATE_ T21,
    GTEST_TEMPLATE_ T22, GTEST_TEMPLATE_ T23, GTEST_TEMPLATE_ T24,
    GTEST_TEMPLATE_ T25, GTEST_TEMPLATE_ T26, GTEST_TEMPLATE_ T27,
    GTEST_TEMPLATE_ T28, GTEST_TEMPLATE_ T29, GTEST_TEMPLATE_ T30,
    GTEST_TEMPLATE_ T31, GTEST_TEMPLATE_ T32, GTEST_TEMPLATE_ T33>
struct Templates33 {
  typedef TemplateSel<T1> Head;
  typedef Templates32<T2, T3, T4, T5, T6, T7, T8, T9, T10, T11, T12, T13, T14,
      T15, T16, T17, T18, T19, T20, T21, T22, T23, T24, T25, T26, T27, T28,
      T29, T30, T31, T32, T33> Tail;
};

template <GTEST_TEMPLATE_ T1, GTEST_TEMPLATE_ T2, GTEST_TEMPLATE_ T3,
    GTEST_TEMPLATE_ T4, GTEST_TEMPLATE_ T5, GTEST_TEMPLATE_ T6,
    GTEST_TEMPLATE_ T7, GTEST_TEMPLATE_ T8, GTEST_TEMPLATE_ T9,
    GTEST_TEMPLATE_ T10, GTEST_TEMPLATE_ T11, GTEST_TEMPLATE_ T12,
    GTEST_TEMPLATE_ T13, GTEST_TEMPLATE_ T14, GTEST_TEMPLATE_ T15,
    GTEST_TEMPLATE_ T16, GTEST_TEMPLATE_ T17, GTEST_TEMPLATE_ T18,
    GTEST_TEMPLATE_ T19, GTEST_TEMPLATE_ T20, GTEST_TEMPLATE_ T21,
    GTEST_TEMPLATE_ T22, GTEST_TEMPLATE_ T23, GTEST_TEMPLATE_ T24,
    GTEST_TEMPLATE_ T25, GTEST_TEMPLATE_ T26, GTEST_TEMPLATE_ T27,
    GTEST_TEMPLATE_ T28, GTEST_TEMPLATE_ T29, GTEST_TEMPLATE_ T30,
    GTEST_TEMPLATE_ T31, GTEST_TEMPLATE_ T32, GTEST_TEMPLATE_ T33,
    GTEST_TEMPLATE_ T34>
struct Templates34 {
  typedef TemplateSel<T1> Head;
  typedef Templates33<T2, T3, T4, T5, T6, T7, T8, T9, T10, T11, T12, T13, T14,
      T15, T16, T17, T18, T19, T20, T21, T22, T23, T24, T25, T26, T27, T28,
      T29, T30, T31, T32, T33, T34> Tail;
};

template <GTEST_TEMPLATE_ T1, GTEST_TEMPLATE_ T2, GTEST_TEMPLATE_ T3,
    GTEST_TEMPLATE_ T4, GTEST_TEMPLATE_ T5, GTEST_TEMPLATE_ T6,
    GTEST_TEMPLATE_ T7, GTEST_TEMPLATE_ T8, GTEST_TEMPLATE_ T9,
    GTEST_TEMPLATE_ T10, GTEST_TEMPLATE_ T11, GTEST_TEMPLATE_ T12,
    GTEST_TEMPLATE_ T13, GTEST_TEMPLATE_ T14, GTEST_TEMPLATE_ T15,
    GTEST_TEMPLATE_ T16, GTEST_TEMPLATE_ T17, GTEST_TEMPLATE_ T18,
    GTEST_TEMPLATE_ T19, GTEST_TEMPLATE_ T20, GTEST_TEMPLATE_ T21,
    GTEST_TEMPLATE_ T22, GTEST_TEMPLATE_ T23, GTEST_TEMPLATE_ T24,
    GTEST_TEMPLATE_ T25, GTEST_TEMPLATE_ T26, GTEST_TEMPLATE_ T27,
    GTEST_TEMPLATE_ T28, GTEST_TEMPLATE_ T29, GTEST_TEMPLATE_ T30,
    GTEST_TEMPLATE_ T31, GTEST_TEMPLATE_ T32, GTEST_TEMPLATE_ T33,
    GTEST_TEMPLATE_ T34, GTEST_TEMPLATE_ T35>
struct Templates35 {
  typedef TemplateSel<T1> Head;
  typedef Templates34<T2, T3, T4, T5, T6, T7, T8, T9, T10, T11, T12, T13, T14,
      T15, T16, T17, T18, T19, T20, T21, T22, T23, T24, T25, T26, T27, T28,
      T29, T30, T31, T32, T33, T34, T35> Tail;
};

template <GTEST_TEMPLATE_ T1, GTEST_TEMPLATE_ T2, GTEST_TEMPLATE_ T3,
    GTEST_TEMPLATE_ T4, GTEST_TEMPLATE_ T5, GTEST_TEMPLATE_ T6,
    GTEST_TEMPLATE_ T7, GTEST_TEMPLATE_ T8, GTEST_TEMPLATE_ T9,
    GTEST_TEMPLATE_ T10, GTEST_TEMPLATE_ T11, GTEST_TEMPLATE_ T12,
    GTEST_TEMPLATE_ T13, GTEST_TEMPLATE_ T14, GTEST_TEMPLATE_ T15,
    GTEST_TEMPLATE_ T16, GTEST_TEMPLATE_ T17, GTEST_TEMPLATE_ T18,
    GTEST_TEMPLATE_ T19, GTEST_TEMPLATE_ T20, GTEST_TEMPLATE_ T21,
    GTEST_TEMPLATE_ T22, GTEST_TEMPLATE_ T23, GTEST_TEMPLATE_ T24,
    GTEST_TEMPLATE_ T25, GTEST_TEMPLATE_ T26, GTEST_TEMPLATE_ T27,
    GTEST_TEMPLATE_ T28, GTEST_TEMPLATE_ T29, GTEST_TEMPLATE_ T30,
    GTEST_TEMPLATE_ T31, GTEST_TEMPLATE_ T32, GTEST_TEMPLATE_ T33,
    GTEST_TEMPLATE_ T34, GTEST_TEMPLATE_ T35, GTEST_TEMPLATE_ T36>
struct Templates36 {
  typedef TemplateSel<T1> Head;
  typedef Templates35<T2, T3, T4, T5, T6, T7, T8, T9, T10, T11, T12, T13, T14,
      T15, T16, T17, T18, T19, T20, T21, T22, T23, T24, T25, T26, T27, T28,
      T29, T30, T31, T32, T33, T34, T35, T36> Tail;
};

template <GTEST_TEMPLATE_ T1, GTEST_TEMPLATE_ T2, GTEST_TEMPLATE_ T3,
    GTEST_TEMPLATE_ T4, GTEST_TEMPLATE_ T5, GTEST_TEMPLATE_ T6,
    GTEST_TEMPLATE_ T7, GTEST_TEMPLATE_ T8, GTEST_TEMPLATE_ T9,
    GTEST_TEMPLATE_ T10, GTEST_TEMPLATE_ T11, GTEST_TEMPLATE_ T12,
    GTEST_TEMPLATE_ T13, GTEST_TEMPLATE_ T14, GTEST_TEMPLATE_ T15,
    GTEST_TEMPLATE_ T16, GTEST_TEMPLATE_ T17, GTEST_TEMPLATE_ T18,
    GTEST_TEMPLATE_ T19, GTEST_TEMPLATE_ T20, GTEST_TEMPLATE_ T21,
    GTEST_TEMPLATE_ T22, GTEST_TEMPLATE_ T23, GTEST_TEMPLATE_ T24,
    GTEST_TEMPLATE_ T25, GTEST_TEMPLATE_ T26, GTEST_TEMPLATE_ T27,
    GTEST_TEMPLATE_ T28, GTEST_TEMPLATE_ T29, GTEST_TEMPLATE_ T30,
    GTEST_TEMPLATE_ T31, GTEST_TEMPLATE_ T32, GTEST_TEMPLATE_ T33,
    GTEST_TEMPLATE_ T34, GTEST_TEMPLATE_ T35, GTEST_TEMPLATE_ T36,
    GTEST_TEMPLATE_ T37>
struct Templates37 {
  typedef TemplateSel<T1> Head;
  typedef Templates36<T2, T3, T4, T5, T6, T7, T8, T9, T10, T11, T12, T13, T14,
      T15, T16, T17, T18, T19, T20, T21, T22, T23, T24, T25, T26, T27, T28,
      T29, T30, T31, T32, T33, T34, T35, T36, T37> Tail;
};

template <GTEST_TEMPLATE_ T1, GTEST_TEMPLATE_ T2, GTEST_TEMPLATE_ T3,
    GTEST_TEMPLATE_ T4, GTEST_TEMPLATE_ T5, GTEST_TEMPLATE_ T6,
    GTEST_TEMPLATE_ T7, GTEST_TEMPLATE_ T8, GTEST_TEMPLATE_ T9,
    GTEST_TEMPLATE_ T10, GTEST_TEMPLATE_ T11, GTEST_TEMPLATE_ T12,
    GTEST_TEMPLATE_ T13, GTEST_TEMPLATE_ T14, GTEST_TEMPLATE_ T15,
    GTEST_TEMPLATE_ T16, GTEST_TEMPLATE_ T17, GTEST_TEMPLATE_ T18,
    GTEST_TEMPLATE_ T19, GTEST_TEMPLATE_ T20, GTEST_TEMPLATE_ T21,
    GTEST_TEMPLATE_ T22, GTEST_TEMPLATE_ T23, GTEST_TEMPLATE_ T24,
    GTEST_TEMPLATE_ T25, GTEST_TEMPLATE_ T26, GTEST_TEMPLATE_ T27,
    GTEST_TEMPLATE_ T28, GTEST_TEMPLATE_ T29, GTEST_TEMPLATE_ T30,
    GTEST_TEMPLATE_ T31, GTEST_TEMPLATE_ T32, GTEST_TEMPLATE_ T33,
    GTEST_TEMPLATE_ T34, GTEST_TEMPLATE_ T35, GTEST_TEMPLATE_ T36,
    GTEST_TEMPLATE_ T37, GTEST_TEMPLATE_ T38>
struct Templates38 {
  typedef TemplateSel<T1> Head;
  typedef Templates37<T2, T3, T4, T5, T6, T7, T8, T9, T10, T11, T12, T13, T14,
      T15, T16, T17, T18, T19, T20, T21, T22, T23, T24, T25, T26, T27, T28,
      T29, T30, T31, T32, T33, T34, T35, T36, T37, T38> Tail;
};

template <GTEST_TEMPLATE_ T1, GTEST_TEMPLATE_ T2, GTEST_TEMPLATE_ T3,
    GTEST_TEMPLATE_ T4, GTEST_TEMPLATE_ T5, GTEST_TEMPLATE_ T6,
    GTEST_TEMPLATE_ T7, GTEST_TEMPLATE_ T8, GTEST_TEMPLATE_ T9,
    GTEST_TEMPLATE_ T10, GTEST_TEMPLATE_ T11, GTEST_TEMPLATE_ T12,
    GTEST_TEMPLATE_ T13, GTEST_TEMPLATE_ T14, GTEST_TEMPLATE_ T15,
    GTEST_TEMPLATE_ T16, GTEST_TEMPLATE_ T17, GTEST_TEMPLATE_ T18,
    GTEST_TEMPLATE_ T19, GTEST_TEMPLATE_ T20, GTEST_TEMPLATE_ T21,
    GTEST_TEMPLATE_ T22, GTEST_TEMPLATE_ T23, GTEST_TEMPLATE_ T24,
    GTEST_TEMPLATE_ T25, GTEST_TEMPLATE_ T26, GTEST_TEMPLATE_ T27,
    GTEST_TEMPLATE_ T28, GTEST_TEMPLATE_ T29, GTEST_TEMPLATE_ T30,
    GTEST_TEMPLATE_ T31, GTEST_TEMPLATE_ T32, GTEST_TEMPLATE_ T33,
    GTEST_TEMPLATE_ T34, GTEST_TEMPLATE_ T35, GTEST_TEMPLATE_ T36,
    GTEST_TEMPLATE_ T37, GTEST_TEMPLATE_ T38, GTEST_TEMPLATE_ T39>
struct Templates39 {
  typedef TemplateSel<T1> Head;
  typedef Templates38<T2, T3, T4, T5, T6, T7, T8, T9, T10, T11, T12, T13, T14,
      T15, T16, T17, T18, T19, T20, T21, T22, T23, T24, T25, T26, T27, T28,
      T29, T30, T31, T32, T33, T34, T35, T36, T37, T38, T39> Tail;
};

template <GTEST_TEMPLATE_ T1, GTEST_TEMPLATE_ T2, GTEST_TEMPLATE_ T3,
    GTEST_TEMPLATE_ T4, GTEST_TEMPLATE_ T5, GTEST_TEMPLATE_ T6,
    GTEST_TEMPLATE_ T7, GTEST_TEMPLATE_ T8, GTEST_TEMPLATE_ T9,
    GTEST_TEMPLATE_ T10, GTEST_TEMPLATE_ T11, GTEST_TEMPLATE_ T12,
    GTEST_TEMPLATE_ T13, GTEST_TEMPLATE_ T14, GTEST_TEMPLATE_ T15,
    GTEST_TEMPLATE_ T16, GTEST_TEMPLATE_ T17, GTEST_TEMPLATE_ T18,
    GTEST_TEMPLATE_ T19, GTEST_TEMPLATE_ T20, GTEST_TEMPLATE_ T21,
    GTEST_TEMPLATE_ T22, GTEST_TEMPLATE_ T23, GTEST_TEMPLATE_ T24,
    GTEST_TEMPLATE_ T25, GTEST_TEMPLATE_ T26, GTEST_TEMPLATE_ T27,
    GTEST_TEMPLATE_ T28, GTEST_TEMPLATE_ T29, GTEST_TEMPLATE_ T30,
    GTEST_TEMPLATE_ T31, GTEST_TEMPLATE_ T32, GTEST_TEMPLATE_ T33,
    GTEST_TEMPLATE_ T34, GTEST_TEMPLATE_ T35, GTEST_TEMPLATE_ T36,
    GTEST_TEMPLATE_ T37, GTEST_TEMPLATE_ T38, GTEST_TEMPLATE_ T39,
    GTEST_TEMPLATE_ T40>
struct Templates40 {
  typedef TemplateSel<T1> Head;
  typedef Templates39<T2, T3, T4, T5, T6, T7, T8, T9, T10, T11, T12, T13, T14,
      T15, T16, T17, T18, T19, T20, T21, T22, T23, T24, T25, T26, T27, T28,
      T29, T30, T31, T32, T33, T34, T35, T36, T37, T38, T39, T40> Tail;
};

template <GTEST_TEMPLATE_ T1, GTEST_TEMPLATE_ T2, GTEST_TEMPLATE_ T3,
    GTEST_TEMPLATE_ T4, GTEST_TEMPLATE_ T5, GTEST_TEMPLATE_ T6,
    GTEST_TEMPLATE_ T7, GTEST_TEMPLATE_ T8, GTEST_TEMPLATE_ T9,
    GTEST_TEMPLATE_ T10, GTEST_TEMPLATE_ T11, GTEST_TEMPLATE_ T12,
    GTEST_TEMPLATE_ T13, GTEST_TEMPLATE_ T14, GTEST_TEMPLATE_ T15,
    GTEST_TEMPLATE_ T16, GTEST_TEMPLATE_ T17, GTEST_TEMPLATE_ T18,
    GTEST_TEMPLATE_ T19, GTEST_TEMPLATE_ T20, GTEST_TEMPLATE_ T21,
    GTEST_TEMPLATE_ T22, GTEST_TEMPLATE_ T23, GTEST_TEMPLATE_ T24,
    GTEST_TEMPLATE_ T25, GTEST_TEMPLATE_ T26, GTEST_TEMPLATE_ T27,
    GTEST_TEMPLATE_ T28, GTEST_TEMPLATE_ T29, GTEST_TEMPLATE_ T30,
    GTEST_TEMPLATE_ T31, GTEST_TEMPLATE_ T32, GTEST_TEMPLATE_ T33,
    GTEST_TEMPLATE_ T34, GTEST_TEMPLATE_ T35, GTEST_TEMPLATE_ T36,
    GTEST_TEMPLATE_ T37, GTEST_TEMPLATE_ T38, GTEST_TEMPLATE_ T39,
    GTEST_TEMPLATE_ T40, GTEST_TEMPLATE_ T41>
struct Templates41 {
  typedef TemplateSel<T1> Head;
  typedef Templates40<T2, T3, T4, T5, T6, T7, T8, T9, T10, T11, T12, T13, T14,
      T15, T16, T17, T18, T19, T20, T21, T22, T23, T24, T25, T26, T27, T28,
      T29, T30, T31, T32, T33, T34, T35, T36, T37, T38, T39, T40, T41> Tail;
};

template <GTEST_TEMPLATE_ T1, GTEST_TEMPLATE_ T2, GTEST_TEMPLATE_ T3,
    GTEST_TEMPLATE_ T4, GTEST_TEMPLATE_ T5, GTEST_TEMPLATE_ T6,
    GTEST_TEMPLATE_ T7, GTEST_TEMPLATE_ T8, GTEST_TEMPLATE_ T9,
    GTEST_TEMPLATE_ T10, GTEST_TEMPLATE_ T11, GTEST_TEMPLATE_ T12,
    GTEST_TEMPLATE_ T13, GTEST_TEMPLATE_ T14, GTEST_TEMPLATE_ T15,
    GTEST_TEMPLATE_ T16, GTEST_TEMPLATE_ T17, GTEST_TEMPLATE_ T18,
    GTEST_TEMPLATE_ T19, GTEST_TEMPLATE_ T20, GTEST_TEMPLATE_ T21,
    GTEST_TEMPLATE_ T22, GTEST_TEMPLATE_ T23, GTEST_TEMPLATE_ T24,
    GTEST_TEMPLATE_ T25, GTEST_TEMPLATE_ T26, GTEST_TEMPLATE_ T27,
    GTEST_TEMPLATE_ T28, GTEST_TEMPLATE_ T29, GTEST_TEMPLATE_ T30,
    GTEST_TEMPLATE_ T31, GTEST_TEMPLATE_ T32, GTEST_TEMPLATE_ T33,
    GTEST_TEMPLATE_ T34, GTEST_TEMPLATE_ T35, GTEST_TEMPLATE_ T36,
    GTEST_TEMPLATE_ T37, GTEST_TEMPLATE_ T38, GTEST_TEMPLATE_ T39,
    GTEST_TEMPLATE_ T40, GTEST_TEMPLATE_ T41, GTEST_TEMPLATE_ T42>
struct Templates42 {
  typedef TemplateSel<T1> Head;
  typedef Templates41<T2, T3, T4, T5, T6, T7, T8, T9, T10, T11, T12, T13, T14,
      T15, T16, T17, T18, T19, T20, T21, T22, T23, T24, T25, T26, T27, T28,
      T29, T30, T31, T32, T33, T34, T35, T36, T37, T38, T39, T40, T41,
      T42> Tail;
};

template <GTEST_TEMPLATE_ T1, GTEST_TEMPLATE_ T2, GTEST_TEMPLATE_ T3,
    GTEST_TEMPLATE_ T4, GTEST_TEMPLATE_ T5, GTEST_TEMPLATE_ T6,
    GTEST_TEMPLATE_ T7, GTEST_TEMPLATE_ T8, GTEST_TEMPLATE_ T9,
    GTEST_TEMPLATE_ T10, GTEST_TEMPLATE_ T11, GTEST_TEMPLATE_ T12,
    GTEST_TEMPLATE_ T13, GTEST_TEMPLATE_ T14, GTEST_TEMPLATE_ T15,
    GTEST_TEMPLATE_ T16, GTEST_TEMPLATE_ T17, GTEST_TEMPLATE_ T18,
    GTEST_TEMPLATE_ T19, GTEST_TEMPLATE_ T20, GTEST_TEMPLATE_ T21,
    GTEST_TEMPLATE_ T22, GTEST_TEMPLATE_ T23, GTEST_TEMPLATE_ T24,
    GTEST_TEMPLATE_ T25, GTEST_TEMPLATE_ T26, GTEST_TEMPLATE_ T27,
    GTEST_TEMPLATE_ T28, GTEST_TEMPLATE_ T29, GTEST_TEMPLATE_ T30,
    GTEST_TEMPLATE_ T31, GTEST_TEMPLATE_ T32, GTEST_TEMPLATE_ T33,
    GTEST_TEMPLATE_ T34, GTEST_TEMPLATE_ T35, GTEST_TEMPLATE_ T36,
    GTEST_TEMPLATE_ T37, GTEST_TEMPLATE_ T38, GTEST_TEMPLATE_ T39,
    GTEST_TEMPLATE_ T40, GTEST_TEMPLATE_ T41, GTEST_TEMPLATE_ T42,
    GTEST_TEMPLATE_ T43>
struct Templates43 {
  typedef TemplateSel<T1> Head;
  typedef Templates42<T2, T3, T4, T5, T6, T7, T8, T9, T10, T11, T12, T13, T14,
      T15, T16, T17, T18, T19, T20, T21, T22, T23, T24, T25, T26, T27, T28,
      T29, T30, T31, T32, T33, T34, T35, T36, T37, T38, T39, T40, T41, T42,
      T43> Tail;
};

template <GTEST_TEMPLATE_ T1, GTEST_TEMPLATE_ T2, GTEST_TEMPLATE_ T3,
    GTEST_TEMPLATE_ T4, GTEST_TEMPLATE_ T5, GTEST_TEMPLATE_ T6,
    GTEST_TEMPLATE_ T7, GTEST_TEMPLATE_ T8, GTEST_TEMPLATE_ T9,
    GTEST_TEMPLATE_ T10, GTEST_TEMPLATE_ T11, GTEST_TEMPLATE_ T12,
    GTEST_TEMPLATE_ T13, GTEST_TEMPLATE_ T14, GTEST_TEMPLATE_ T15,
    GTEST_TEMPLATE_ T16, GTEST_TEMPLATE_ T17, GTEST_TEMPLATE_ T18,
    GTEST_TEMPLATE_ T19, GTEST_TEMPLATE_ T20, GTEST_TEMPLATE_ T21,
    GTEST_TEMPLATE_ T22, GTEST_TEMPLATE_ T23, GTEST_TEMPLATE_ T24,
    GTEST_TEMPLATE_ T25, GTEST_TEMPLATE_ T26, GTEST_TEMPLATE_ T27,
    GTEST_TEMPLATE_ T28, GTEST_TEMPLATE_ T29, GTEST_TEMPLATE_ T30,
    GTEST_TEMPLATE_ T31, GTEST_TEMPLATE_ T32, GTEST_TEMPLATE_ T33,
    GTEST_TEMPLATE_ T34, GTEST_TEMPLATE_ T35, GTEST_TEMPLATE_ T36,
    GTEST_TEMPLATE_ T37, GTEST_TEMPLATE_ T38, GTEST_TEMPLATE_ T39,
    GTEST_TEMPLATE_ T40, GTEST_TEMPLATE_ T41, GTEST_TEMPLATE_ T42,
    GTEST_TEMPLATE_ T43, GTEST_TEMPLATE_ T44>
struct Templates44 {
  typedef TemplateSel<T1> Head;
  typedef Templates43<T2, T3, T4, T5, T6, T7, T8, T9, T10, T11, T12, T13, T14,
      T15, T16, T17, T18, T19, T20, T21, T22, T23, T24, T25, T26, T27, T28,
      T29, T30, T31, T32, T33, T34, T35, T36, T37, T38, T39, T40, T41, T42,
      T43, T44> Tail;
};

template <GTEST_TEMPLATE_ T1, GTEST_TEMPLATE_ T2, GTEST_TEMPLATE_ T3,
    GTEST_TEMPLATE_ T4, GTEST_TEMPLATE_ T5, GTEST_TEMPLATE_ T6,
    GTEST_TEMPLATE_ T7, GTEST_TEMPLATE_ T8, GTEST_TEMPLATE_ T9,
    GTEST_TEMPLATE_ T10, GTEST_TEMPLATE_ T11, GTEST_TEMPLATE_ T12,
    GTEST_TEMPLATE_ T13, GTEST_TEMPLATE_ T14, GTEST_TEMPLATE_ T15,
    GTEST_TEMPLATE_ T16, GTEST_TEMPLATE_ T17, GTEST_TEMPLATE_ T18,
    GTEST_TEMPLATE_ T19, GTEST_TEMPLATE_ T20, GTEST_TEMPLATE_ T21,
    GTEST_TEMPLATE_ T22, GTEST_TEMPLATE_ T23, GTEST_TEMPLATE_ T24,
    GTEST_TEMPLATE_ T25, GTEST_TEMPLATE_ T26, GTEST_TEMPLATE_ T27,
    GTEST_TEMPLATE_ T28, GTEST_TEMPLATE_ T29, GTEST_TEMPLATE_ T30,
    GTEST_TEMPLATE_ T31, GTEST_TEMPLATE_ T32, GTEST_TEMPLATE_ T33,
    GTEST_TEMPLATE_ T34, GTEST_TEMPLATE_ T35, GTEST_TEMPLATE_ T36,
    GTEST_TEMPLATE_ T37, GTEST_TEMPLATE_ T38, GTEST_TEMPLATE_ T39,
    GTEST_TEMPLATE_ T40, GTEST_TEMPLATE_ T41, GTEST_TEMPLATE_ T42,
    GTEST_TEMPLATE_ T43, GTEST_TEMPLATE_ T44, GTEST_TEMPLATE_ T45>
struct Templates45 {
  typedef TemplateSel<T1> Head;
  typedef Templates44<T2, T3, T4, T5, T6, T7, T8, T9, T10, T11, T12, T13, T14,
      T15, T16, T17, T18, T19, T20, T21, T22, T23, T24, T25, T26, T27, T28,
      T29, T30, T31, T32, T33, T34, T35, T36, T37, T38, T39, T40, T41, T42,
      T43, T44, T45> Tail;
};

template <GTEST_TEMPLATE_ T1, GTEST_TEMPLATE_ T2, GTEST_TEMPLATE_ T3,
    GTEST_TEMPLATE_ T4, GTEST_TEMPLATE_ T5, GTEST_TEMPLATE_ T6,
    GTEST_TEMPLATE_ T7, GTEST_TEMPLATE_ T8, GTEST_TEMPLATE_ T9,
    GTEST_TEMPLATE_ T10, GTEST_TEMPLATE_ T11, GTEST_TEMPLATE_ T12,
    GTEST_TEMPLATE_ T13, GTEST_TEMPLATE_ T14, GTEST_TEMPLATE_ T15,
    GTEST_TEMPLATE_ T16, GTEST_TEMPLATE_ T17, GTEST_TEMPLATE_ T18,
    GTEST_TEMPLATE_ T19, GTEST_TEMPLATE_ T20, GTEST_TEMPLATE_ T21,
    GTEST_TEMPLATE_ T22, GTEST_TEMPLATE_ T23, GTEST_TEMPLATE_ T24,
    GTEST_TEMPLATE_ T25, GTEST_TEMPLATE_ T26, GTEST_TEMPLATE_ T27,
    GTEST_TEMPLATE_ T28, GTEST_TEMPLATE_ T29, GTEST_TEMPLATE_ T30,
    GTEST_TEMPLATE_ T31, GTEST_TEMPLATE_ T32, GTEST_TEMPLATE_ T33,
    GTEST_TEMPLATE_ T34, GTEST_TEMPLATE_ T35, GTEST_TEMPLATE_ T36,
    GTEST_TEMPLATE_ T37, GTEST_TEMPLATE_ T38, GTEST_TEMPLATE_ T39,
    GTEST_TEMPLATE_ T40, GTEST_TEMPLATE_ T41, GTEST_TEMPLATE_ T42,
    GTEST_TEMPLATE_ T43, GTEST_TEMPLATE_ T44, GTEST_TEMPLATE_ T45,
    GTEST_TEMPLATE_ T46>
struct Templates46 {
  typedef TemplateSel<T1> Head;
  typedef Templates45<T2, T3, T4, T5, T6, T7, T8, T9, T10, T11, T12, T13, T14,
      T15, T16, T17, T18, T19, T20, T21, T22, T23, T24, T25, T26, T27, T28,
      T29, T30, T31, T32, T33, T34, T35, T36, T37, T38, T39, T40, T41, T42,
      T43, T44, T45, T46> Tail;
};

template <GTEST_TEMPLATE_ T1, GTEST_TEMPLATE_ T2, GTEST_TEMPLATE_ T3,
    GTEST_TEMPLATE_ T4, GTEST_TEMPLATE_ T5, GTEST_TEMPLATE_ T6,
    GTEST_TEMPLATE_ T7, GTEST_TEMPLATE_ T8, GTEST_TEMPLATE_ T9,
    GTEST_TEMPLATE_ T10, GTEST_TEMPLATE_ T11, GTEST_TEMPLATE_ T12,
    GTEST_TEMPLATE_ T13, GTEST_TEMPLATE_ T14, GTEST_TEMPLATE_ T15,
    GTEST_TEMPLATE_ T16, GTEST_TEMPLATE_ T17, GTEST_TEMPLATE_ T18,
    GTEST_TEMPLATE_ T19, GTEST_TEMPLATE_ T20, GTEST_TEMPLATE_ T21,
    GTEST_TEMPLATE_ T22, GTEST_TEMPLATE_ T23, GTEST_TEMPLATE_ T24,
    GTEST_TEMPLATE_ T25, GTEST_TEMPLATE_ T26, GTEST_TEMPLATE_ T27,
    GTEST_TEMPLATE_ T28, GTEST_TEMPLATE_ T29, GTEST_TEMPLATE_ T30,
    GTEST_TEMPLATE_ T31, GTEST_TEMPLATE_ T32, GTEST_TEMPLATE_ T33,
    GTEST_TEMPLATE_ T34, GTEST_TEMPLATE_ T35, GTEST_TEMPLATE_ T36,
    GTEST_TEMPLATE_ T37, GTEST_TEMPLATE_ T38, GTEST_TEMPLATE_ T39,
    GTEST_TEMPLATE_ T40, GTEST_TEMPLATE_ T41, GTEST_TEMPLATE_ T42,
    GTEST_TEMPLATE_ T43, GTEST_TEMPLATE_ T44, GTEST_TEMPLATE_ T45,
    GTEST_TEMPLATE_ T46, GTEST_TEMPLATE_ T47>
struct Templates47 {
  typedef TemplateSel<T1> Head;
  typedef Templates46<T2, T3, T4, T5, T6, T7, T8, T9, T10, T11, T12, T13, T14,
      T15, T16, T17, T18, T19, T20, T21, T22, T23, T24, T25, T26, T27, T28,
      T29, T30, T31, T32, T33, T34, T35, T36, T37, T38, T39, T40, T41, T42,
      T43, T44, T45, T46, T47> Tail;
};

template <GTEST_TEMPLATE_ T1, GTEST_TEMPLATE_ T2, GTEST_TEMPLATE_ T3,
    GTEST_TEMPLATE_ T4, GTEST_TEMPLATE_ T5, GTEST_TEMPLATE_ T6,
    GTEST_TEMPLATE_ T7, GTEST_TEMPLATE_ T8, GTEST_TEMPLATE_ T9,
    GTEST_TEMPLATE_ T10, GTEST_TEMPLATE_ T11, GTEST_TEMPLATE_ T12,
    GTEST_TEMPLATE_ T13, GTEST_TEMPLATE_ T14, GTEST_TEMPLATE_ T15,
    GTEST_TEMPLATE_ T16, GTEST_TEMPLATE_ T17, GTEST_TEMPLATE_ T18,
    GTEST_TEMPLATE_ T19, GTEST_TEMPLATE_ T20, GTEST_TEMPLATE_ T21,
    GTEST_TEMPLATE_ T22, GTEST_TEMPLATE_ T23, GTEST_TEMPLATE_ T24,
    GTEST_TEMPLATE_ T25, GTEST_TEMPLATE_ T26, GTEST_TEMPLATE_ T27,
    GTEST_TEMPLATE_ T28, GTEST_TEMPLATE_ T29, GTEST_TEMPLATE_ T30,
    GTEST_TEMPLATE_ T31, GTEST_TEMPLATE_ T32, GTEST_TEMPLATE_ T33,
    GTEST_TEMPLATE_ T34, GTEST_TEMPLATE_ T35, GTEST_TEMPLATE_ T36,
    GTEST_TEMPLATE_ T37, GTEST_TEMPLATE_ T38, GTEST_TEMPLATE_ T39,
    GTEST_TEMPLATE_ T40, GTEST_TEMPLATE_ T41, GTEST_TEMPLATE_ T42,
    GTEST_TEMPLATE_ T43, GTEST_TEMPLATE_ T44, GTEST_TEMPLATE_ T45,
    GTEST_TEMPLATE_ T46, GTEST_TEMPLATE_ T47, GTEST_TEMPLATE_ T48>
struct Templates48 {
  typedef TemplateSel<T1> Head;
  typedef Templates47<T2, T3, T4, T5, T6, T7, T8, T9, T10, T11, T12, T13, T14,
      T15, T16, T17, T18, T19, T20, T21, T22, T23, T24, T25, T26, T27, T28,
      T29, T30, T31, T32, T33, T34, T35, T36, T37, T38, T39, T40, T41, T42,
      T43, T44, T45, T46, T47, T48> Tail;
};

template <GTEST_TEMPLATE_ T1, GTEST_TEMPLATE_ T2, GTEST_TEMPLATE_ T3,
    GTEST_TEMPLATE_ T4, GTEST_TEMPLATE_ T5, GTEST_TEMPLATE_ T6,
    GTEST_TEMPLATE_ T7, GTEST_TEMPLATE_ T8, GTEST_TEMPLATE_ T9,
    GTEST_TEMPLATE_ T10, GTEST_TEMPLATE_ T11, GTEST_TEMPLATE_ T12,
    GTEST_TEMPLATE_ T13, GTEST_TEMPLATE_ T14, GTEST_TEMPLATE_ T15,
    GTEST_TEMPLATE_ T16, GTEST_TEMPLATE_ T17, GTEST_TEMPLATE_ T18,
    GTEST_TEMPLATE_ T19, GTEST_TEMPLATE_ T20, GTEST_TEMPLATE_ T21,
    GTEST_TEMPLATE_ T22, GTEST_TEMPLATE_ T23, GTEST_TEMPLATE_ T24,
    GTEST_TEMPLATE_ T25, GTEST_TEMPLATE_ T26, GTEST_TEMPLATE_ T27,
    GTEST_TEMPLATE_ T28, GTEST_TEMPLATE_ T29, GTEST_TEMPLATE_ T30,
    GTEST_TEMPLATE_ T31, GTEST_TEMPLATE_ T32, GTEST_TEMPLATE_ T33,
    GTEST_TEMPLATE_ T34, GTEST_TEMPLATE_ T35, GTEST_TEMPLATE_ T36,
    GTEST_TEMPLATE_ T37, GTEST_TEMPLATE_ T38, GTEST_TEMPLATE_ T39,
    GTEST_TEMPLATE_ T40, GTEST_TEMPLATE_ T41, GTEST_TEMPLATE_ T42,
    GTEST_TEMPLATE_ T43, GTEST_TEMPLATE_ T44, GTEST_TEMPLATE_ T45,
    GTEST_TEMPLATE_ T46, GTEST_TEMPLATE_ T47, GTEST_TEMPLATE_ T48,
    GTEST_TEMPLATE_ T49>
struct Templates49 {
  typedef TemplateSel<T1> Head;
  typedef Templates48<T2, T3, T4, T5, T6, T7, T8, T9, T10, T11, T12, T13, T14,
      T15, T16, T17, T18, T19, T20, T21, T22, T23, T24, T25, T26, T27, T28,
      T29, T30, T31, T32, T33, T34, T35, T36, T37, T38, T39, T40, T41, T42,
      T43, T44, T45, T46, T47, T48, T49> Tail;
};

template <GTEST_TEMPLATE_ T1, GTEST_TEMPLATE_ T2, GTEST_TEMPLATE_ T3,
    GTEST_TEMPLATE_ T4, GTEST_TEMPLATE_ T5, GTEST_TEMPLATE_ T6,
    GTEST_TEMPLATE_ T7, GTEST_TEMPLATE_ T8, GTEST_TEMPLATE_ T9,
    GTEST_TEMPLATE_ T10, GTEST_TEMPLATE_ T11, GTEST_TEMPLATE_ T12,
    GTEST_TEMPLATE_ T13, GTEST_TEMPLATE_ T14, GTEST_TEMPLATE_ T15,
    GTEST_TEMPLATE_ T16, GTEST_TEMPLATE_ T17, GTEST_TEMPLATE_ T18,
    GTEST_TEMPLATE_ T19, GTEST_TEMPLATE_ T20, GTEST_TEMPLATE_ T21,
    GTEST_TEMPLATE_ T22, GTEST_TEMPLATE_ T23, GTEST_TEMPLATE_ T24,
    GTEST_TEMPLATE_ T25, GTEST_TEMPLATE_ T26, GTEST_TEMPLATE_ T27,
    GTEST_TEMPLATE_ T28, GTEST_TEMPLATE_ T29, GTEST_TEMPLATE_ T30,
    GTEST_TEMPLATE_ T31, GTEST_TEMPLATE_ T32, GTEST_TEMPLATE_ T33,
    GTEST_TEMPLATE_ T34, GTEST_TEMPLATE_ T35, GTEST_TEMPLATE_ T36,
    GTEST_TEMPLATE_ T37, GTEST_TEMPLATE_ T38, GTEST_TEMPLATE_ T39,
    GTEST_TEMPLATE_ T40, GTEST_TEMPLATE_ T41, GTEST_TEMPLATE_ T42,
    GTEST_TEMPLATE_ T43, GTEST_TEMPLATE_ T44, GTEST_TEMPLATE_ T45,
    GTEST_TEMPLATE_ T46, GTEST_TEMPLATE_ T47, GTEST_TEMPLATE_ T48,
    GTEST_TEMPLATE_ T49, GTEST_TEMPLATE_ T50>
struct Templates50 {
  typedef TemplateSel<T1> Head;
  typedef Templates49<T2, T3, T4, T5, T6, T7, T8, T9, T10, T11, T12, T13, T14,
      T15, T16, T17, T18, T19, T20, T21, T22, T23, T24, T25, T26, T27, T28,
      T29, T30, T31, T32, T33, T34, T35, T36, T37, T38, T39, T40, T41, T42,
      T43, T44, T45, T46, T47, T48, T49, T50> Tail;
};


// We don't want to require the users to write TemplatesN<...> directly,
// as that would require them to count the length.  Templates<...> is much
// easier to write, but generates horrible messages when there is a
// compiler error, as gcc insists on printing out each template
// argument, even if it has the default value (this means Templates<list>
// will appear as Templates<list, NoneT, NoneT, ..., NoneT> in the compiler
// errors).
//
// Our solution is to combine the best part of the two approaches: a
// user would write Templates<T1, ..., TN>, and Google Test will translate
// that to TemplatesN<T1, ..., TN> internally to make error messages
// readable.  The translation is done by the 'type' member of the
// Templates template.
template <GTEST_TEMPLATE_ T1 = NoneT, GTEST_TEMPLATE_ T2 = NoneT,
    GTEST_TEMPLATE_ T3 = NoneT, GTEST_TEMPLATE_ T4 = NoneT,
    GTEST_TEMPLATE_ T5 = NoneT, GTEST_TEMPLATE_ T6 = NoneT,
    GTEST_TEMPLATE_ T7 = NoneT, GTEST_TEMPLATE_ T8 = NoneT,
    GTEST_TEMPLATE_ T9 = NoneT, GTEST_TEMPLATE_ T10 = NoneT,
    GTEST_TEMPLATE_ T11 = NoneT, GTEST_TEMPLATE_ T12 = NoneT,
    GTEST_TEMPLATE_ T13 = NoneT, GTEST_TEMPLATE_ T14 = NoneT,
    GTEST_TEMPLATE_ T15 = NoneT, GTEST_TEMPLATE_ T16 = NoneT,
    GTEST_TEMPLATE_ T17 = NoneT, GTEST_TEMPLATE_ T18 = NoneT,
    GTEST_TEMPLATE_ T19 = NoneT, GTEST_TEMPLATE_ T20 = NoneT,
    GTEST_TEMPLATE_ T21 = NoneT, GTEST_TEMPLATE_ T22 = NoneT,
    GTEST_TEMPLATE_ T23 = NoneT, GTEST_TEMPLATE_ T24 = NoneT,
    GTEST_TEMPLATE_ T25 = NoneT, GTEST_TEMPLATE_ T26 = NoneT,
    GTEST_TEMPLATE_ T27 = NoneT, GTEST_TEMPLATE_ T28 = NoneT,
    GTEST_TEMPLATE_ T29 = NoneT, GTEST_TEMPLATE_ T30 = NoneT,
    GTEST_TEMPLATE_ T31 = NoneT, GTEST_TEMPLATE_ T32 = NoneT,
    GTEST_TEMPLATE_ T33 = NoneT, GTEST_TEMPLATE_ T34 = NoneT,
    GTEST_TEMPLATE_ T35 = NoneT, GTEST_TEMPLATE_ T36 = NoneT,
    GTEST_TEMPLATE_ T37 = NoneT, GTEST_TEMPLATE_ T38 = NoneT,
    GTEST_TEMPLATE_ T39 = NoneT, GTEST_TEMPLATE_ T40 = NoneT,
    GTEST_TEMPLATE_ T41 = NoneT, GTEST_TEMPLATE_ T42 = NoneT,
    GTEST_TEMPLATE_ T43 = NoneT, GTEST_TEMPLATE_ T44 = NoneT,
    GTEST_TEMPLATE_ T45 = NoneT, GTEST_TEMPLATE_ T46 = NoneT,
    GTEST_TEMPLATE_ T47 = NoneT, GTEST_TEMPLATE_ T48 = NoneT,
    GTEST_TEMPLATE_ T49 = NoneT, GTEST_TEMPLATE_ T50 = NoneT>
struct Templates {
  typedef Templates50<T1, T2, T3, T4, T5, T6, T7, T8, T9, T10, T11, T12, T13,
      T14, T15, T16, T17, T18, T19, T20, T21, T22, T23, T24, T25, T26, T27,
      T28, T29, T30, T31, T32, T33, T34, T35, T36, T37, T38, T39, T40, T41,
      T42, T43, T44, T45, T46, T47, T48, T49, T50> type;
};

template <>
struct Templates<NoneT, NoneT, NoneT, NoneT, NoneT, NoneT, NoneT, NoneT, NoneT,
    NoneT, NoneT, NoneT, NoneT, NoneT, NoneT, NoneT, NoneT, NoneT, NoneT,
    NoneT, NoneT, NoneT, NoneT, NoneT, NoneT, NoneT, NoneT, NoneT, NoneT,
    NoneT, NoneT, NoneT, NoneT, NoneT, NoneT, NoneT, NoneT, NoneT, NoneT,
    NoneT, NoneT, NoneT, NoneT, NoneT, NoneT, NoneT, NoneT, NoneT, NoneT,
    NoneT> {
  typedef Templates0 type;
};
template <GTEST_TEMPLATE_ T1>
struct Templates<T1, NoneT, NoneT, NoneT, NoneT, NoneT, NoneT, NoneT, NoneT,
    NoneT, NoneT, NoneT, NoneT, NoneT, NoneT, NoneT, NoneT, NoneT, NoneT,
    NoneT, NoneT, NoneT, NoneT, NoneT, NoneT, NoneT, NoneT, NoneT, NoneT,
    NoneT, NoneT, NoneT, NoneT, NoneT, NoneT, NoneT, NoneT, NoneT, NoneT,
    NoneT, NoneT, NoneT, NoneT, NoneT, NoneT, NoneT, NoneT, NoneT, NoneT,
    NoneT> {
  typedef Templates1<T1> type;
};
template <GTEST_TEMPLATE_ T1, GTEST_TEMPLATE_ T2>
struct Templates<T1, T2, NoneT, NoneT, NoneT, NoneT, NoneT, NoneT, NoneT,
    NoneT, NoneT, NoneT, NoneT, NoneT, NoneT, NoneT, NoneT, NoneT, NoneT,
    NoneT, NoneT, NoneT, NoneT, NoneT, NoneT, NoneT, NoneT, NoneT, NoneT,
    NoneT, NoneT, NoneT, NoneT, NoneT, NoneT, NoneT, NoneT, NoneT, NoneT,
    NoneT, NoneT, NoneT, NoneT, NoneT, NoneT, NoneT, NoneT, NoneT, NoneT,
    NoneT> {
  typedef Templates2<T1, T2> type;
};
template <GTEST_TEMPLATE_ T1, GTEST_TEMPLATE_ T2, GTEST_TEMPLATE_ T3>
struct Templates<T1, T2, T3, NoneT, NoneT, NoneT, NoneT, NoneT, NoneT, NoneT,
    NoneT, NoneT, NoneT, NoneT, NoneT, NoneT, NoneT, NoneT, NoneT, NoneT,
    NoneT, NoneT, NoneT, NoneT, NoneT, NoneT, NoneT, NoneT, NoneT, NoneT,
    NoneT, NoneT, NoneT, NoneT, NoneT, NoneT, NoneT, NoneT, NoneT, NoneT,
    NoneT, NoneT, NoneT, NoneT, NoneT, NoneT, NoneT, NoneT, NoneT, NoneT> {
  typedef Templates3<T1, T2, T3> type;
};
template <GTEST_TEMPLATE_ T1, GTEST_TEMPLATE_ T2, GTEST_TEMPLATE_ T3,
    GTEST_TEMPLATE_ T4>
struct Templates<T1, T2, T3, T4, NoneT, NoneT, NoneT, NoneT, NoneT, NoneT,
    NoneT, NoneT, NoneT, NoneT, NoneT, NoneT, NoneT, NoneT, NoneT, NoneT,
    NoneT, NoneT, NoneT, NoneT, NoneT, NoneT, NoneT, NoneT, NoneT, NoneT,
    NoneT, NoneT, NoneT, NoneT, NoneT, NoneT, NoneT, NoneT, NoneT, NoneT,
    NoneT, NoneT, NoneT, NoneT, NoneT, NoneT, NoneT, NoneT, NoneT, NoneT> {
  typedef Templates4<T1, T2, T3, T4> type;
};
template <GTEST_TEMPLATE_ T1, GTEST_TEMPLATE_ T2, GTEST_TEMPLATE_ T3,
    GTEST_TEMPLATE_ T4, GTEST_TEMPLATE_ T5>
struct Templates<T1, T2, T3, T4, T5, NoneT, NoneT, NoneT, NoneT, NoneT, NoneT,
    NoneT, NoneT, NoneT, NoneT, NoneT, NoneT, NoneT, NoneT, NoneT, NoneT,
    NoneT, NoneT, NoneT, NoneT, NoneT, NoneT, NoneT, NoneT, NoneT, NoneT,
    NoneT, NoneT, NoneT, NoneT, NoneT, NoneT, NoneT, NoneT, NoneT, NoneT,
    NoneT, NoneT, NoneT, NoneT, NoneT, NoneT, NoneT, NoneT, NoneT> {
  typedef Templates5<T1, T2, T3, T4, T5> type;
};
template <GTEST_TEMPLATE_ T1, GTEST_TEMPLATE_ T2, GTEST_TEMPLATE_ T3,
    GTEST_TEMPLATE_ T4, GTEST_TEMPLATE_ T5, GTEST_TEMPLATE_ T6>
struct Templates<T1, T2, T3, T4, T5, T6, NoneT, NoneT, NoneT, NoneT, NoneT,
    NoneT, NoneT, NoneT, NoneT, NoneT, NoneT, NoneT, NoneT, NoneT, NoneT,
    NoneT, NoneT, NoneT, NoneT, NoneT, NoneT, NoneT, NoneT, NoneT, NoneT,
    NoneT, NoneT, NoneT, NoneT, NoneT, NoneT, NoneT, NoneT, NoneT, NoneT,
    NoneT, NoneT, NoneT, NoneT, NoneT, NoneT, NoneT, NoneT, NoneT> {
  typedef Templates6<T1, T2, T3, T4, T5, T6> type;
};
template <GTEST_TEMPLATE_ T1, GTEST_TEMPLATE_ T2, GTEST_TEMPLATE_ T3,
    GTEST_TEMPLATE_ T4, GTEST_TEMPLATE_ T5, GTEST_TEMPLATE_ T6,
    GTEST_TEMPLATE_ T7>
struct Templates<T1, T2, T3, T4, T5, T6, T7, NoneT, NoneT, NoneT, NoneT, NoneT,
    NoneT, NoneT, NoneT, NoneT, NoneT, NoneT, NoneT, NoneT, NoneT, NoneT,
    NoneT, NoneT, NoneT, NoneT, NoneT, NoneT, NoneT, NoneT, NoneT, NoneT,
    NoneT, NoneT, NoneT, NoneT, NoneT, NoneT, NoneT, NoneT, NoneT, NoneT,
    NoneT, NoneT, NoneT, NoneT, NoneT, NoneT, NoneT, NoneT> {
  typedef Templates7<T1, T2, T3, T4, T5, T6, T7> type;
};
template <GTEST_TEMPLATE_ T1, GTEST_TEMPLATE_ T2, GTEST_TEMPLATE_ T3,
    GTEST_TEMPLATE_ T4, GTEST_TEMPLATE_ T5, GTEST_TEMPLATE_ T6,
    GTEST_TEMPLATE_ T7, GTEST_TEMPLATE_ T8>
struct Templates<T1, T2, T3, T4, T5, T6, T7, T8, NoneT, NoneT, NoneT, NoneT,
    NoneT, NoneT, NoneT, NoneT, NoneT, NoneT, NoneT, NoneT, NoneT, NoneT,
    NoneT, NoneT, NoneT, NoneT, NoneT, NoneT, NoneT, NoneT, NoneT, NoneT,
    NoneT, NoneT, NoneT, NoneT, NoneT, NoneT, NoneT, NoneT, NoneT, NoneT,
    NoneT, NoneT, NoneT, NoneT, NoneT, NoneT, NoneT, NoneT> {
  typedef Templates8<T1, T2, T3, T4, T5, T6, T7, T8> type;
};
template <GTEST_TEMPLATE_ T1, GTEST_TEMPLATE_ T2, GTEST_TEMPLATE_ T3,
    GTEST_TEMPLATE_ T4, GTEST_TEMPLATE_ T5, GTEST_TEMPLATE_ T6,
    GTEST_TEMPLATE_ T7, GTEST_TEMPLATE_ T8, GTEST_TEMPLATE_ T9>
struct Templates<T1, T2, T3, T4, T5, T6, T7, T8, T9, NoneT, NoneT, NoneT,
    NoneT, NoneT, NoneT, NoneT, NoneT, NoneT, NoneT, NoneT, NoneT, NoneT,
    NoneT, NoneT, NoneT, NoneT, NoneT, NoneT, NoneT, NoneT, NoneT, NoneT,
    NoneT, NoneT, NoneT, NoneT, NoneT, NoneT, NoneT, NoneT, NoneT, NoneT,
    NoneT, NoneT, NoneT, NoneT, NoneT, NoneT, NoneT, NoneT> {
  typedef Templates9<T1, T2, T3, T4, T5, T6, T7, T8, T9> type;
};
template <GTEST_TEMPLATE_ T1, GTEST_TEMPLATE_ T2, GTEST_TEMPLATE_ T3,
    GTEST_TEMPLATE_ T4, GTEST_TEMPLATE_ T5, GTEST_TEMPLATE_ T6,
    GTEST_TEMPLATE_ T7, GTEST_TEMPLATE_ T8, GTEST_TEMPLATE_ T9,
    GTEST_TEMPLATE_ T10>
struct Templates<T1, T2, T3, T4, T5, T6, T7, T8, T9, T10, NoneT, NoneT, NoneT,
    NoneT, NoneT, NoneT, NoneT, NoneT, NoneT, NoneT, NoneT, NoneT, NoneT,
    NoneT, NoneT, NoneT, NoneT, NoneT, NoneT, NoneT, NoneT, NoneT, NoneT,
    NoneT, NoneT, NoneT, NoneT, NoneT, NoneT, NoneT, NoneT, NoneT, NoneT,
    NoneT, NoneT, NoneT, NoneT, NoneT, NoneT, NoneT> {
  typedef Templates10<T1, T2, T3, T4, T5, T6, T7, T8, T9, T10> type;
};
template <GTEST_TEMPLATE_ T1, GTEST_TEMPLATE_ T2, GTEST_TEMPLATE_ T3,
    GTEST_TEMPLATE_ T4, GTEST_TEMPLATE_ T5, GTEST_TEMPLATE_ T6,
    GTEST_TEMPLATE_ T7, GTEST_TEMPLATE_ T8, GTEST_TEMPLATE_ T9,
    GTEST_TEMPLATE_ T10, GTEST_TEMPLATE_ T11>
struct Templates<T1, T2, T3, T4, T5, T6, T7, T8, T9, T10, T11, NoneT, NoneT,
    NoneT, NoneT, NoneT, NoneT, NoneT, NoneT, NoneT, NoneT, NoneT, NoneT,
    NoneT, NoneT, NoneT, NoneT, NoneT, NoneT, NoneT, NoneT, NoneT, NoneT,
    NoneT, NoneT, NoneT, NoneT, NoneT, NoneT, NoneT, NoneT, NoneT, NoneT,
    NoneT, NoneT, NoneT, NoneT, NoneT, NoneT, NoneT> {
  typedef Templates11<T1, T2, T3, T4, T5, T6, T7, T8, T9, T10, T11> type;
};
template <GTEST_TEMPLATE_ T1, GTEST_TEMPLATE_ T2, GTEST_TEMPLATE_ T3,
    GTEST_TEMPLATE_ T4, GTEST_TEMPLATE_ T5, GTEST_TEMPLATE_ T6,
    GTEST_TEMPLATE_ T7, GTEST_TEMPLATE_ T8, GTEST_TEMPLATE_ T9,
    GTEST_TEMPLATE_ T10, GTEST_TEMPLATE_ T11, GTEST_TEMPLATE_ T12>
struct Templates<T1, T2, T3, T4, T5, T6, T7, T8, T9, T10, T11, T12, NoneT,
    NoneT, NoneT, NoneT, NoneT, NoneT, NoneT, NoneT, NoneT, NoneT, NoneT,
    NoneT, NoneT, NoneT, NoneT, NoneT, NoneT, NoneT, NoneT, NoneT, NoneT,
    NoneT, NoneT, NoneT, NoneT, NoneT, NoneT, NoneT, NoneT, NoneT, NoneT,
    NoneT, NoneT, NoneT, NoneT, NoneT, NoneT, NoneT> {
  typedef Templates12<T1, T2, T3, T4, T5, T6, T7, T8, T9, T10, T11, T12> type;
};
template <GTEST_TEMPLATE_ T1, GTEST_TEMPLATE_ T2, GTEST_TEMPLATE_ T3,
    GTEST_TEMPLATE_ T4, GTEST_TEMPLATE_ T5, GTEST_TEMPLATE_ T6,
    GTEST_TEMPLATE_ T7, GTEST_TEMPLATE_ T8, GTEST_TEMPLATE_ T9,
    GTEST_TEMPLATE_ T10, GTEST_TEMPLATE_ T11, GTEST_TEMPLATE_ T12,
    GTEST_TEMPLATE_ T13>
struct Templates<T1, T2, T3, T4, T5, T6, T7, T8, T9, T10, T11, T12, T13, NoneT,
    NoneT, NoneT, NoneT, NoneT, NoneT, NoneT, NoneT, NoneT, NoneT, NoneT,
    NoneT, NoneT, NoneT, NoneT, NoneT, NoneT, NoneT, NoneT, NoneT, NoneT,
    NoneT, NoneT, NoneT, NoneT, NoneT, NoneT, NoneT, NoneT, NoneT, NoneT,
    NoneT, NoneT, NoneT, NoneT, NoneT, NoneT> {
  typedef Templates13<T1, T2, T3, T4, T5, T6, T7, T8, T9, T10, T11, T12,
      T13> type;
};
template <GTEST_TEMPLATE_ T1, GTEST_TEMPLATE_ T2, GTEST_TEMPLATE_ T3,
    GTEST_TEMPLATE_ T4, GTEST_TEMPLATE_ T5, GTEST_TEMPLATE_ T6,
    GTEST_TEMPLATE_ T7, GTEST_TEMPLATE_ T8, GTEST_TEMPLATE_ T9,
    GTEST_TEMPLATE_ T10, GTEST_TEMPLATE_ T11, GTEST_TEMPLATE_ T12,
    GTEST_TEMPLATE_ T13, GTEST_TEMPLATE_ T14>
struct Templates<T1, T2, T3, T4, T5, T6, T7, T8, T9, T10, T11, T12, T13, T14,
    NoneT, NoneT, NoneT, NoneT, NoneT, NoneT, NoneT, NoneT, NoneT, NoneT,
    NoneT, NoneT, NoneT, NoneT, NoneT, NoneT, NoneT, NoneT, NoneT, NoneT,
    NoneT, NoneT, NoneT, NoneT, NoneT, NoneT, NoneT, NoneT, NoneT, NoneT,
    NoneT, NoneT, NoneT, NoneT, NoneT, NoneT> {
  typedef Templates14<T1, T2, T3, T4, T5, T6, T7, T8, T9, T10, T11, T12, T13,
      T14> type;
};
template <GTEST_TEMPLATE_ T1, GTEST_TEMPLATE_ T2, GTEST_TEMPLATE_ T3,
    GTEST_TEMPLATE_ T4, GTEST_TEMPLATE_ T5, GTEST_TEMPLATE_ T6,
    GTEST_TEMPLATE_ T7, GTEST_TEMPLATE_ T8, GTEST_TEMPLATE_ T9,
    GTEST_TEMPLATE_ T10, GTEST_TEMPLATE_ T11, GTEST_TEMPLATE_ T12,
    GTEST_TEMPLATE_ T13, GTEST_TEMPLATE_ T14, GTEST_TEMPLATE_ T15>
struct Templates<T1, T2, T3, T4, T5, T6, T7, T8, T9, T10, T11, T12, T13, T14,
    T15, NoneT, NoneT, NoneT, NoneT, NoneT, NoneT, NoneT, NoneT, NoneT, NoneT,
    NoneT, NoneT, NoneT, NoneT, NoneT, NoneT, NoneT, NoneT, NoneT, NoneT,
    NoneT, NoneT, NoneT, NoneT, NoneT, NoneT, NoneT, NoneT, NoneT, NoneT,
    NoneT, NoneT, NoneT, NoneT, NoneT> {
  typedef Templates15<T1, T2, T3, T4, T5, T6, T7, T8, T9, T10, T11, T12, T13,
      T14, T15> type;
};
template <GTEST_TEMPLATE_ T1, GTEST_TEMPLATE_ T2, GTEST_TEMPLATE_ T3,
    GTEST_TEMPLATE_ T4, GTEST_TEMPLATE_ T5, GTEST_TEMPLATE_ T6,
    GTEST_TEMPLATE_ T7, GTEST_TEMPLATE_ T8, GTEST_TEMPLATE_ T9,
    GTEST_TEMPLATE_ T10, GTEST_TEMPLATE_ T11, GTEST_TEMPLATE_ T12,
    GTEST_TEMPLATE_ T13, GTEST_TEMPLATE_ T14, GTEST_TEMPLATE_ T15,
    GTEST_TEMPLATE_ T16>
struct Templates<T1, T2, T3, T4, T5, T6, T7, T8, T9, T10, T11, T12, T13, T14,
    T15, T16, NoneT, NoneT, NoneT, NoneT, NoneT, NoneT, NoneT, NoneT, NoneT,
    NoneT, NoneT, NoneT, NoneT, NoneT, NoneT, NoneT, NoneT, NoneT, NoneT,
    NoneT, NoneT, NoneT, NoneT, NoneT, NoneT, NoneT, NoneT, NoneT, NoneT,
    NoneT, NoneT, NoneT, NoneT, NoneT> {
  typedef Templates16<T1, T2, T3, T4, T5, T6, T7, T8, T9, T10, T11, T12, T13,
      T14, T15, T16> type;
};
template <GTEST_TEMPLATE_ T1, GTEST_TEMPLATE_ T2, GTEST_TEMPLATE_ T3,
    GTEST_TEMPLATE_ T4, GTEST_TEMPLATE_ T5, GTEST_TEMPLATE_ T6,
    GTEST_TEMPLATE_ T7, GTEST_TEMPLATE_ T8, GTEST_TEMPLATE_ T9,
    GTEST_TEMPLATE_ T10, GTEST_TEMPLATE_ T11, GTEST_TEMPLATE_ T12,
    GTEST_TEMPLATE_ T13, GTEST_TEMPLATE_ T14, GTEST_TEMPLATE_ T15,
    GTEST_TEMPLATE_ T16, GTEST_TEMPLATE_ T17>
struct Templates<T1, T2, T3, T4, T5, T6, T7, T8, T9, T10, T11, T12, T13, T14,
    T15, T16, T17, NoneT, NoneT, NoneT, NoneT, NoneT, NoneT, NoneT, NoneT,
    NoneT, NoneT, NoneT, NoneT, NoneT, NoneT, NoneT, NoneT, NoneT, NoneT,
    NoneT, NoneT, NoneT, NoneT, NoneT, NoneT, NoneT, NoneT, NoneT, NoneT,
    NoneT, NoneT, NoneT, NoneT, NoneT> {
  typedef Templates17<T1, T2, T3, T4, T5, T6, T7, T8, T9, T10, T11, T12, T13,
      T14, T15, T16, T17> type;
};
template <GTEST_TEMPLATE_ T1, GTEST_TEMPLATE_ T2, GTEST_TEMPLATE_ T3,
    GTEST_TEMPLATE_ T4, GTEST_TEMPLATE_ T5, GTEST_TEMPLATE_ T6,
    GTEST_TEMPLATE_ T7, GTEST_TEMPLATE_ T8, GTEST_TEMPLATE_ T9,
    GTEST_TEMPLATE_ T10, GTEST_TEMPLATE_ T11, GTEST_TEMPLATE_ T12,
    GTEST_TEMPLATE_ T13, GTEST_TEMPLATE_ T14, GTEST_TEMPLATE_ T15,
    GTEST_TEMPLATE_ T16, GTEST_TEMPLATE_ T17, GTEST_TEMPLATE_ T18>
struct Templates<T1, T2, T3, T4, T5, T6, T7, T8, T9, T10, T11, T12, T13, T14,
    T15, T16, T17, T18, NoneT, NoneT, NoneT, NoneT, NoneT, NoneT, NoneT, NoneT,
    NoneT, NoneT, NoneT, NoneT, NoneT, NoneT, NoneT, NoneT, NoneT, NoneT,
    NoneT, NoneT, NoneT, NoneT, NoneT, NoneT, NoneT, NoneT, NoneT, NoneT,
    NoneT, NoneT, NoneT, NoneT> {
  typedef Templates18<T1, T2, T3, T4, T5, T6, T7, T8, T9, T10, T11, T12, T13,
      T14, T15, T16, T17, T18> type;
};
template <GTEST_TEMPLATE_ T1, GTEST_TEMPLATE_ T2, GTEST_TEMPLATE_ T3,
    GTEST_TEMPLATE_ T4, GTEST_TEMPLATE_ T5, GTEST_TEMPLATE_ T6,
    GTEST_TEMPLATE_ T7, GTEST_TEMPLATE_ T8, GTEST_TEMPLATE_ T9,
    GTEST_TEMPLATE_ T10, GTEST_TEMPLATE_ T11, GTEST_TEMPLATE_ T12,
    GTEST_TEMPLATE_ T13, GTEST_TEMPLATE_ T14, GTEST_TEMPLATE_ T15,
    GTEST_TEMPLATE_ T16, GTEST_TEMPLATE_ T17, GTEST_TEMPLATE_ T18,
    GTEST_TEMPLATE_ T19>
struct Templates<T1, T2, T3, T4, T5, T6, T7, T8, T9, T10, T11, T12, T13, T14,
    T15, T16, T17, T18, T19, NoneT, NoneT, NoneT, NoneT, NoneT, NoneT, NoneT,
    NoneT, NoneT, NoneT, NoneT, NoneT, NoneT, NoneT, NoneT, NoneT, NoneT,
    NoneT, NoneT, NoneT, NoneT, NoneT, NoneT, NoneT, NoneT, NoneT, NoneT,
    NoneT, NoneT, NoneT, NoneT> {
  typedef Templates19<T1, T2, T3, T4, T5, T6, T7, T8, T9, T10, T11, T12, T13,
      T14, T15, T16, T17, T18, T19> type;
};
template <GTEST_TEMPLATE_ T1, GTEST_TEMPLATE_ T2, GTEST_TEMPLATE_ T3,
    GTEST_TEMPLATE_ T4, GTEST_TEMPLATE_ T5, GTEST_TEMPLATE_ T6,
    GTEST_TEMPLATE_ T7, GTEST_TEMPLATE_ T8, GTEST_TEMPLATE_ T9,
    GTEST_TEMPLATE_ T10, GTEST_TEMPLATE_ T11, GTEST_TEMPLATE_ T12,
    GTEST_TEMPLATE_ T13, GTEST_TEMPLATE_ T14, GTEST_TEMPLATE_ T15,
    GTEST_TEMPLATE_ T16, GTEST_TEMPLATE_ T17, GTEST_TEMPLATE_ T18,
    GTEST_TEMPLATE_ T19, GTEST_TEMPLATE_ T20>
struct Templates<T1, T2, T3, T4, T5, T6, T7, T8, T9, T10, T11, T12, T13, T14,
    T15, T16, T17, T18, T19, T20, NoneT, NoneT, NoneT, NoneT, NoneT, NoneT,
    NoneT, NoneT, NoneT, NoneT, NoneT, NoneT, NoneT, NoneT, NoneT, NoneT,
    NoneT, NoneT, NoneT, NoneT, NoneT, NoneT, NoneT, NoneT, NoneT, NoneT,
    NoneT, NoneT, NoneT, NoneT> {
  typedef Templates20<T1, T2, T3, T4, T5, T6, T7, T8, T9, T10, T11, T12, T13,
      T14, T15, T16, T17, T18, T19, T20> type;
};
template <GTEST_TEMPLATE_ T1, GTEST_TEMPLATE_ T2, GTEST_TEMPLATE_ T3,
    GTEST_TEMPLATE_ T4, GTEST_TEMPLATE_ T5, GTEST_TEMPLATE_ T6,
    GTEST_TEMPLATE_ T7, GTEST_TEMPLATE_ T8, GTEST_TEMPLATE_ T9,
    GTEST_TEMPLATE_ T10, GTEST_TEMPLATE_ T11, GTEST_TEMPLATE_ T12,
    GTEST_TEMPLATE_ T13, GTEST_TEMPLATE_ T14, GTEST_TEMPLATE_ T15,
    GTEST_TEMPLATE_ T16, GTEST_TEMPLATE_ T17, GTEST_TEMPLATE_ T18,
    GTEST_TEMPLATE_ T19, GTEST_TEMPLATE_ T20, GTEST_TEMPLATE_ T21>
struct Templates<T1, T2, T3, T4, T5, T6, T7, T8, T9, T10, T11, T12, T13, T14,
    T15, T16, T17, T18, T19, T20, T21, NoneT, NoneT, NoneT, NoneT, NoneT,
    NoneT, NoneT, NoneT, NoneT, NoneT, NoneT, NoneT, NoneT, NoneT, NoneT,
    NoneT, NoneT, NoneT, NoneT, NoneT, NoneT, NoneT, NoneT, NoneT, NoneT,
    NoneT, NoneT, NoneT, NoneT> {
  typedef Templates21<T1, T2, T3, T4, T5, T6, T7, T8, T9, T10, T11, T12, T13,
      T14, T15, T16, T17, T18, T19, T20, T21> type;
};
template <GTEST_TEMPLATE_ T1, GTEST_TEMPLATE_ T2, GTEST_TEMPLATE_ T3,
    GTEST_TEMPLATE_ T4, GTEST_TEMPLATE_ T5, GTEST_TEMPLATE_ T6,
    GTEST_TEMPLATE_ T7, GTEST_TEMPLATE_ T8, GTEST_TEMPLATE_ T9,
    GTEST_TEMPLATE_ T10, GTEST_TEMPLATE_ T11, GTEST_TEMPLATE_ T12,
    GTEST_TEMPLATE_ T13, GTEST_TEMPLATE_ T14, GTEST_TEMPLATE_ T15,
    GTEST_TEMPLATE_ T16, GTEST_TEMPLATE_ T17, GTEST_TEMPLATE_ T18,
    GTEST_TEMPLATE_ T19, GTEST_TEMPLATE_ T20, GTEST_TEMPLATE_ T21,
    GTEST_TEMPLATE_ T22>
struct Templates<T1, T2, T3, T4, T5, T6, T7, T8, T9, T10, T11, T12, T13, T14,
    T15, T16, T17, T18, T19, T20, T21, T22, NoneT, NoneT, NoneT, NoneT, NoneT,
    NoneT, NoneT, NoneT, NoneT, NoneT, NoneT, NoneT, NoneT, NoneT, NoneT,
    NoneT, NoneT, NoneT, NoneT, NoneT, NoneT, NoneT, NoneT, NoneT, NoneT,
    NoneT, NoneT, NoneT> {
  typedef Templates22<T1, T2, T3, T4, T5, T6, T7, T8, T9, T10, T11, T12, T13,
      T14, T15, T16, T17, T18, T19, T20, T21, T22> type;
};
template <GTEST_TEMPLATE_ T1, GTEST_TEMPLATE_ T2, GTEST_TEMPLATE_ T3,
    GTEST_TEMPLATE_ T4, GTEST_TEMPLATE_ T5, GTEST_TEMPLATE_ T6,
    GTEST_TEMPLATE_ T7, GTEST_TEMPLATE_ T8, GTEST_TEMPLATE_ T9,
    GTEST_TEMPLATE_ T10, GTEST_TEMPLATE_ T11, GTEST_TEMPLATE_ T12,
    GTEST_TEMPLATE_ T13, GTEST_TEMPLATE_ T14, GTEST_TEMPLATE_ T15,
    GTEST_TEMPLATE_ T16, GTEST_TEMPLATE_ T17, GTEST_TEMPLATE_ T18,
    GTEST_TEMPLATE_ T19, GTEST_TEMPLATE_ T20, GTEST_TEMPLATE_ T21,
    GTEST_TEMPLATE_ T22, GTEST_TEMPLATE_ T23>
struct Templates<T1, T2, T3, T4, T5, T6, T7, T8, T9, T10, T11, T12, T13, T14,
    T15, T16, T17, T18, T19, T20, T21, T22, T23, NoneT, NoneT, NoneT, NoneT,
    NoneT, NoneT, NoneT, NoneT, NoneT, NoneT, NoneT, NoneT, NoneT, NoneT,
    NoneT, NoneT, NoneT, NoneT, NoneT, NoneT, NoneT, NoneT, NoneT, NoneT,
    NoneT, NoneT, NoneT> {
  typedef Templates23<T1, T2, T3, T4, T5, T6, T7, T8, T9, T10, T11, T12, T13,
      T14, T15, T16, T17, T18, T19, T20, T21, T22, T23> type;
};
template <GTEST_TEMPLATE_ T1, GTEST_TEMPLATE_ T2, GTEST_TEMPLATE_ T3,
    GTEST_TEMPLATE_ T4, GTEST_TEMPLATE_ T5, GTEST_TEMPLATE_ T6,
    GTEST_TEMPLATE_ T7, GTEST_TEMPLATE_ T8, GTEST_TEMPLATE_ T9,
    GTEST_TEMPLATE_ T10, GTEST_TEMPLATE_ T11, GTEST_TEMPLATE_ T12,
    GTEST_TEMPLATE_ T13, GTEST_TEMPLATE_ T14, GTEST_TEMPLATE_ T15,
    GTEST_TEMPLATE_ T16, GTEST_TEMPLATE_ T17, GTEST_TEMPLATE_ T18,
    GTEST_TEMPLATE_ T19, GTEST_TEMPLATE_ T20, GTEST_TEMPLATE_ T21,
    GTEST_TEMPLATE_ T22, GTEST_TEMPLATE_ T23, GTEST_TEMPLATE_ T24>
struct Templates<T1, T2, T3, T4, T5, T6, T7, T8, T9, T10, T11, T12, T13, T14,
    T15, T16, T17, T18, T19, T20, T21, T22, T23, T24, NoneT, NoneT, NoneT,
    NoneT, NoneT, NoneT, NoneT, NoneT, NoneT, NoneT, NoneT, NoneT, NoneT,
    NoneT, NoneT, NoneT, NoneT, NoneT, NoneT, NoneT, NoneT, NoneT, NoneT,
    NoneT, NoneT, NoneT> {
  typedef Templates24<T1, T2, T3, T4, T5, T6, T7, T8, T9, T10, T11, T12, T13,
      T14, T15, T16, T17, T18, T19, T20, T21, T22, T23, T24> type;
};
template <GTEST_TEMPLATE_ T1, GTEST_TEMPLATE_ T2, GTEST_TEMPLATE_ T3,
    GTEST_TEMPLATE_ T4, GTEST_TEMPLATE_ T5, GTEST_TEMPLATE_ T6,
    GTEST_TEMPLATE_ T7, GTEST_TEMPLATE_ T8, GTEST_TEMPLATE_ T9,
    GTEST_TEMPLATE_ T10, GTEST_TEMPLATE_ T11, GTEST_TEMPLATE_ T12,
    GTEST_TEMPLATE_ T13, GTEST_TEMPLATE_ T14, GTEST_TEMPLATE_ T15,
    GTEST_TEMPLATE_ T16, GTEST_TEMPLATE_ T17, GTEST_TEMPLATE_ T18,
    GTEST_TEMPLATE_ T19, GTEST_TEMPLATE_ T20, GTEST_TEMPLATE_ T21,
    GTEST_TEMPLATE_ T22, GTEST_TEMPLATE_ T23, GTEST_TEMPLATE_ T24,
    GTEST_TEMPLATE_ T25>
struct Templates<T1, T2, T3, T4, T5, T6, T7, T8, T9, T10, T11, T12, T13, T14,
    T15, T16, T17, T18, T19, T20, T21, T22, T23, T24, T25, NoneT, NoneT, NoneT,
    NoneT, NoneT, NoneT, NoneT, NoneT, NoneT, NoneT, NoneT, NoneT, NoneT,
    NoneT, NoneT, NoneT, NoneT, NoneT, NoneT, NoneT, NoneT, NoneT, NoneT,
    NoneT, NoneT> {
  typedef Templates25<T1, T2, T3, T4, T5, T6, T7, T8, T9, T10, T11, T12, T13,
      T14, T15, T16, T17, T18, T19, T20, T21, T22, T23, T24, T25> type;
};
template <GTEST_TEMPLATE_ T1, GTEST_TEMPLATE_ T2, GTEST_TEMPLATE_ T3,
    GTEST_TEMPLATE_ T4, GTEST_TEMPLATE_ T5, GTEST_TEMPLATE_ T6,
    GTEST_TEMPLATE_ T7, GTEST_TEMPLATE_ T8, GTEST_TEMPLATE_ T9,
    GTEST_TEMPLATE_ T10, GTEST_TEMPLATE_ T11, GTEST_TEMPLATE_ T12,
    GTEST_TEMPLATE_ T13, GTEST_TEMPLATE_ T14, GTEST_TEMPLATE_ T15,
    GTEST_TEMPLATE_ T16, GTEST_TEMPLATE_ T17, GTEST_TEMPLATE_ T18,
    GTEST_TEMPLATE_ T19, GTEST_TEMPLATE_ T20, GTEST_TEMPLATE_ T21,
    GTEST_TEMPLATE_ T22, GTEST_TEMPLATE_ T23, GTEST_TEMPLATE_ T24,
    GTEST_TEMPLATE_ T25, GTEST_TEMPLATE_ T26>
struct Templates<T1, T2, T3, T4, T5, T6, T7, T8, T9, T10, T11, T12, T13, T14,
    T15, T16, T17, T18, T19, T20, T21, T22, T23, T24, T25, T26, NoneT, NoneT,
    NoneT, NoneT, NoneT, NoneT, NoneT, NoneT, NoneT, NoneT, NoneT, NoneT,
    NoneT, NoneT, NoneT, NoneT, NoneT, NoneT, NoneT, NoneT, NoneT, NoneT,
    NoneT, NoneT> {
  typedef Templates26<T1, T2, T3, T4, T5, T6, T7, T8, T9, T10, T11, T12, T13,
      T14, T15, T16, T17, T18, T19, T20, T21, T22, T23, T24, T25, T26> type;
};
template <GTEST_TEMPLATE_ T1, GTEST_TEMPLATE_ T2, GTEST_TEMPLATE_ T3,
    GTEST_TEMPLATE_ T4, GTEST_TEMPLATE_ T5, GTEST_TEMPLATE_ T6,
    GTEST_TEMPLATE_ T7, GTEST_TEMPLATE_ T8, GTEST_TEMPLATE_ T9,
    GTEST_TEMPLATE_ T10, GTEST_TEMPLATE_ T11, GTEST_TEMPLATE_ T12,
    GTEST_TEMPLATE_ T13, GTEST_TEMPLATE_ T14, GTEST_TEMPLATE_ T15,
    GTEST_TEMPLATE_ T16, GTEST_TEMPLATE_ T17, GTEST_TEMPLATE_ T18,
    GTEST_TEMPLATE_ T19, GTEST_TEMPLATE_ T20, GTEST_TEMPLATE_ T21,
    GTEST_TEMPLATE_ T22, GTEST_TEMPLATE_ T23, GTEST_TEMPLATE_ T24,
    GTEST_TEMPLATE_ T25, GTEST_TEMPLATE_ T26, GTEST_TEMPLATE_ T27>
struct Templates<T1, T2, T3, T4, T5, T6, T7, T8, T9, T10, T11, T12, T13, T14,
    T15, T16, T17, T18, T19, T20, T21, T22, T23, T24, T25, T26, T27, NoneT,
    NoneT, NoneT, NoneT, NoneT, NoneT, NoneT, NoneT, NoneT, NoneT, NoneT,
    NoneT, NoneT, NoneT, NoneT, NoneT, NoneT, NoneT, NoneT, NoneT, NoneT,
    NoneT, NoneT> {
  typedef Templates27<T1, T2, T3, T4, T5, T6, T7, T8, T9, T10, T11, T12, T13,
      T14, T15, T16, T17, T18, T19, T20, T21, T22, T23, T24, T25, T26,
      T27> type;
};
template <GTEST_TEMPLATE_ T1, GTEST_TEMPLATE_ T2, GTEST_TEMPLATE_ T3,
    GTEST_TEMPLATE_ T4, GTEST_TEMPLATE_ T5, GTEST_TEMPLATE_ T6,
    GTEST_TEMPLATE_ T7, GTEST_TEMPLATE_ T8, GTEST_TEMPLATE_ T9,
    GTEST_TEMPLATE_ T10, GTEST_TEMPLATE_ T11, GTEST_TEMPLATE_ T12,
    GTEST_TEMPLATE_ T13, GTEST_TEMPLATE_ T14, GTEST_TEMPLATE_ T15,
    GTEST_TEMPLATE_ T16, GTEST_TEMPLATE_ T17, GTEST_TEMPLATE_ T18,
    GTEST_TEMPLATE_ T19, GTEST_TEMPLATE_ T20, GTEST_TEMPLATE_ T21,
    GTEST_TEMPLATE_ T22, GTEST_TEMPLATE_ T23, GTEST_TEMPLATE_ T24,
    GTEST_TEMPLATE_ T25, GTEST_TEMPLATE_ T26, GTEST_TEMPLATE_ T27,
    GTEST_TEMPLATE_ T28>
struct Templates<T1, T2, T3, T4, T5, T6, T7, T8, T9, T10, T11, T12, T13, T14,
    T15, T16, T17, T18, T19, T20, T21, T22, T23, T24, T25, T26, T27, T28,
    NoneT, NoneT, NoneT, NoneT, NoneT, NoneT, NoneT, NoneT, NoneT, NoneT,
    NoneT, NoneT, NoneT, NoneT, NoneT, NoneT, NoneT, NoneT, NoneT, NoneT,
    NoneT, NoneT> {
  typedef Templates28<T1, T2, T3, T4, T5, T6, T7, T8, T9, T10, T11, T12, T13,
      T14, T15, T16, T17, T18, T19, T20, T21, T22, T23, T24, T25, T26, T27,
      T28> type;
};
template <GTEST_TEMPLATE_ T1, GTEST_TEMPLATE_ T2, GTEST_TEMPLATE_ T3,
    GTEST_TEMPLATE_ T4, GTEST_TEMPLATE_ T5, GTEST_TEMPLATE_ T6,
    GTEST_TEMPLATE_ T7, GTEST_TEMPLATE_ T8, GTEST_TEMPLATE_ T9,
    GTEST_TEMPLATE_ T10, GTEST_TEMPLATE_ T11, GTEST_TEMPLATE_ T12,
    GTEST_TEMPLATE_ T13, GTEST_TEMPLATE_ T14, GTEST_TEMPLATE_ T15,
    GTEST_TEMPLATE_ T16, GTEST_TEMPLATE_ T17, GTEST_TEMPLATE_ T18,
    GTEST_TEMPLATE_ T19, GTEST_TEMPLATE_ T20, GTEST_TEMPLATE_ T21,
    GTEST_TEMPLATE_ T22, GTEST_TEMPLATE_ T23, GTEST_TEMPLATE_ T24,
    GTEST_TEMPLATE_ T25, GTEST_TEMPLATE_ T26, GTEST_TEMPLATE_ T27,
    GTEST_TEMPLATE_ T28, GTEST_TEMPLATE_ T29>
struct Templates<T1, T2, T3, T4, T5, T6, T7, T8, T9, T10, T11, T12, T13, T14,
    T15, T16, T17, T18, T19, T20, T21, T22, T23, T24, T25, T26, T27, T28, T29,
    NoneT, NoneT, NoneT, NoneT, NoneT, NoneT, NoneT, NoneT, NoneT, NoneT,
    NoneT, NoneT, NoneT, NoneT, NoneT, NoneT, NoneT, NoneT, NoneT, NoneT,
    NoneT> {
  typedef Templates29<T1, T2, T3, T4, T5, T6, T7, T8, T9, T10, T11, T12, T13,
      T14, T15, T16, T17, T18, T19, T20, T21, T22, T23, T24, T25, T26, T27,
      T28, T29> type;
};
template <GTEST_TEMPLATE_ T1, GTEST_TEMPLATE_ T2, GTEST_TEMPLATE_ T3,
    GTEST_TEMPLATE_ T4, GTEST_TEMPLATE_ T5, GTEST_TEMPLATE_ T6,
    GTEST_TEMPLATE_ T7, GTEST_TEMPLATE_ T8, GTEST_TEMPLATE_ T9,
    GTEST_TEMPLATE_ T10, GTEST_TEMPLATE_ T11, GTEST_TEMPLATE_ T12,
    GTEST_TEMPLATE_ T13, GTEST_TEMPLATE_ T14, GTEST_TEMPLATE_ T15,
    GTEST_TEMPLATE_ T16, GTEST_TEMPLATE_ T17, GTEST_TEMPLATE_ T18,
    GTEST_TEMPLATE_ T19, GTEST_TEMPLATE_ T20, GTEST_TEMPLATE_ T21,
    GTEST_TEMPLATE_ T22, GTEST_TEMPLATE_ T23, GTEST_TEMPLATE_ T24,
    GTEST_TEMPLATE_ T25, GTEST_TEMPLATE_ T26, GTEST_TEMPLATE_ T27,
    GTEST_TEMPLATE_ T28, GTEST_TEMPLATE_ T29, GTEST_TEMPLATE_ T30>
struct Templates<T1, T2, T3, T4, T5, T6, T7, T8, T9, T10, T11, T12, T13, T14,
    T15, T16, T17, T18, T19, T20, T21, T22, T23, T24, T25, T26, T27, T28, T29,
    T30, NoneT, NoneT, NoneT, NoneT, NoneT, NoneT, NoneT, NoneT, NoneT, NoneT,
    NoneT, NoneT, NoneT, NoneT, NoneT, NoneT, NoneT, NoneT, NoneT, NoneT> {
  typedef Templates30<T1, T2, T3, T4, T5, T6, T7, T8, T9, T10, T11, T12, T13,
      T14, T15, T16, T17, T18, T19, T20, T21, T22, T23, T24, T25, T26, T27,
      T28, T29, T30> type;
};
template <GTEST_TEMPLATE_ T1, GTEST_TEMPLATE_ T2, GTEST_TEMPLATE_ T3,
    GTEST_TEMPLATE_ T4, GTEST_TEMPLATE_ T5, GTEST_TEMPLATE_ T6,
    GTEST_TEMPLATE_ T7, GTEST_TEMPLATE_ T8, GTEST_TEMPLATE_ T9,
    GTEST_TEMPLATE_ T10, GTEST_TEMPLATE_ T11, GTEST_TEMPLATE_ T12,
    GTEST_TEMPLATE_ T13, GTEST_TEMPLATE_ T14, GTEST_TEMPLATE_ T15,
    GTEST_TEMPLATE_ T16, GTEST_TEMPLATE_ T17, GTEST_TEMPLATE_ T18,
    GTEST_TEMPLATE_ T19, GTEST_TEMPLATE_ T20, GTEST_TEMPLATE_ T21,
    GTEST_TEMPLATE_ T22, GTEST_TEMPLATE_ T23, GTEST_TEMPLATE_ T24,
    GTEST_TEMPLATE_ T25, GTEST_TEMPLATE_ T26, GTEST_TEMPLATE_ T27,
    GTEST_TEMPLATE_ T28, GTEST_TEMPLATE_ T29, GTEST_TEMPLATE_ T30,
    GTEST_TEMPLATE_ T31>
struct Templates<T1, T2, T3, T4, T5, T6, T7, T8, T9, T10, T11, T12, T13, T14,
    T15, T16, T17, T18, T19, T20, T21, T22, T23, T24, T25, T26, T27, T28, T29,
    T30, T31, NoneT, NoneT, NoneT, NoneT, NoneT, NoneT, NoneT, NoneT, NoneT,
    NoneT, NoneT, NoneT, NoneT, NoneT, NoneT, NoneT, NoneT, NoneT, NoneT> {
  typedef Templates31<T1, T2, T3, T4, T5, T6, T7, T8, T9, T10, T11, T12, T13,
      T14, T15, T16, T17, T18, T19, T20, T21, T22, T23, T24, T25, T26, T27,
      T28, T29, T30, T31> type;
};
template <GTEST_TEMPLATE_ T1, GTEST_TEMPLATE_ T2, GTEST_TEMPLATE_ T3,
    GTEST_TEMPLATE_ T4, GTEST_TEMPLATE_ T5, GTEST_TEMPLATE_ T6,
    GTEST_TEMPLATE_ T7, GTEST_TEMPLATE_ T8, GTEST_TEMPLATE_ T9,
    GTEST_TEMPLATE_ T10, GTEST_TEMPLATE_ T11, GTEST_TEMPLATE_ T12,
    GTEST_TEMPLATE_ T13, GTEST_TEMPLATE_ T14, GTEST_TEMPLATE_ T15,
    GTEST_TEMPLATE_ T16, GTEST_TEMPLATE_ T17, GTEST_TEMPLATE_ T18,
    GTEST_TEMPLATE_ T19, GTEST_TEMPLATE_ T20, GTEST_TEMPLATE_ T21,
    GTEST_TEMPLATE_ T22, GTEST_TEMPLATE_ T23, GTEST_TEMPLATE_ T24,
    GTEST_TEMPLATE_ T25, GTEST_TEMPLATE_ T26, GTEST_TEMPLATE_ T27,
    GTEST_TEMPLATE_ T28, GTEST_TEMPLATE_ T29, GTEST_TEMPLATE_ T30,
    GTEST_TEMPLATE_ T31, GTEST_TEMPLATE_ T32>
struct Templates<T1, T2, T3, T4, T5, T6, T7, T8, T9, T10, T11, T12, T13, T14,
    T15, T16, T17, T18, T19, T20, T21, T22, T23, T24, T25, T26, T27, T28, T29,
    T30, T31, T32, NoneT, NoneT, NoneT, NoneT, NoneT, NoneT, NoneT, NoneT,
    NoneT, NoneT, NoneT, NoneT, NoneT, NoneT, NoneT, NoneT, NoneT, NoneT> {
  typedef Templates32<T1, T2, T3, T4, T5, T6, T7, T8, T9, T10, T11, T12, T13,
      T14, T15, T16, T17, T18, T19, T20, T21, T22, T23, T24, T25, T26, T27,
      T28, T29, T30, T31, T32> type;
};
template <GTEST_TEMPLATE_ T1, GTEST_TEMPLATE_ T2, GTEST_TEMPLATE_ T3,
    GTEST_TEMPLATE_ T4, GTEST_TEMPLATE_ T5, GTEST_TEMPLATE_ T6,
    GTEST_TEMPLATE_ T7, GTEST_TEMPLATE_ T8, GTEST_TEMPLATE_ T9,
    GTEST_TEMPLATE_ T10, GTEST_TEMPLATE_ T11, GTEST_TEMPLATE_ T12,
    GTEST_TEMPLATE_ T13, GTEST_TEMPLATE_ T14, GTEST_TEMPLATE_ T15,
    GTEST_TEMPLATE_ T16, GTEST_TEMPLATE_ T17, GTEST_TEMPLATE_ T18,
    GTEST_TEMPLATE_ T19, GTEST_TEMPLATE_ T20, GTEST_TEMPLATE_ T21,
    GTEST_TEMPLATE_ T22, GTEST_TEMPLATE_ T23, GTEST_TEMPLATE_ T24,
    GTEST_TEMPLATE_ T25, GTEST_TEMPLATE_ T26, GTEST_TEMPLATE_ T27,
    GTEST_TEMPLATE_ T28, GTEST_TEMPLATE_ T29, GTEST_TEMPLATE_ T30,
    GTEST_TEMPLATE_ T31, GTEST_TEMPLATE_ T32, GTEST_TEMPLATE_ T33>
struct Templates<T1, T2, T3, T4, T5, T6, T7, T8, T9, T10, T11, T12, T13, T14,
    T15, T16, T17, T18, T19, T20, T21, T22, T23, T24, T25, T26, T27, T28, T29,
    T30, T31, T32, T33, NoneT, NoneT, NoneT, NoneT, NoneT, NoneT, NoneT, NoneT,
    NoneT, NoneT, NoneT, NoneT, NoneT, NoneT, NoneT, NoneT, NoneT> {
  typedef Templates33<T1, T2, T3, T4, T5, T6, T7, T8, T9, T10, T11, T12, T13,
      T14, T15, T16, T17, T18, T19, T20, T21, T22, T23, T24, T25, T26, T27,
      T28, T29, T30, T31, T32, T33> type;
};
template <GTEST_TEMPLATE_ T1, GTEST_TEMPLATE_ T2, GTEST_TEMPLATE_ T3,
    GTEST_TEMPLATE_ T4, GTEST_TEMPLATE_ T5, GTEST_TEMPLATE_ T6,
    GTEST_TEMPLATE_ T7, GTEST_TEMPLATE_ T8, GTEST_TEMPLATE_ T9,
    GTEST_TEMPLATE_ T10, GTEST_TEMPLATE_ T11, GTEST_TEMPLATE_ T12,
    GTEST_TEMPLATE_ T13, GTEST_TEMPLATE_ T14, GTEST_TEMPLATE_ T15,
    GTEST_TEMPLATE_ T16, GTEST_TEMPLATE_ T17, GTEST_TEMPLATE_ T18,
    GTEST_TEMPLATE_ T19, GTEST_TEMPLATE_ T20, GTEST_TEMPLATE_ T21,
    GTEST_TEMPLATE_ T22, GTEST_TEMPLATE_ T23, GTEST_TEMPLATE_ T24,
    GTEST_TEMPLATE_ T25, GTEST_TEMPLATE_ T26, GTEST_TEMPLATE_ T27,
    GTEST_TEMPLATE_ T28, GTEST_TEMPLATE_ T29, GTEST_TEMPLATE_ T30,
    GTEST_TEMPLATE_ T31, GTEST_TEMPLATE_ T32, GTEST_TEMPLATE_ T33,
    GTEST_TEMPLATE_ T34>
struct Templates<T1, T2, T3, T4, T5, T6, T7, T8, T9, T10, T11, T12, T13, T14,
    T15, T16, T17, T18, T19, T20, T21, T22, T23, T24, T25, T26, T27, T28, T29,
    T30, T31, T32, T33, T34, NoneT, NoneT, NoneT, NoneT, NoneT, NoneT, NoneT,
    NoneT, NoneT, NoneT, NoneT, NoneT, NoneT, NoneT, NoneT, NoneT> {
  typedef Templates34<T1, T2, T3, T4, T5, T6, T7, T8, T9, T10, T11, T12, T13,
      T14, T15, T16, T17, T18, T19, T20, T21, T22, T23, T24, T25, T26, T27,
      T28, T29, T30, T31, T32, T33, T34> type;
};
template <GTEST_TEMPLATE_ T1, GTEST_TEMPLATE_ T2, GTEST_TEMPLATE_ T3,
    GTEST_TEMPLATE_ T4, GTEST_TEMPLATE_ T5, GTEST_TEMPLATE_ T6,
    GTEST_TEMPLATE_ T7, GTEST_TEMPLATE_ T8, GTEST_TEMPLATE_ T9,
    GTEST_TEMPLATE_ T10, GTEST_TEMPLATE_ T11, GTEST_TEMPLATE_ T12,
    GTEST_TEMPLATE_ T13, GTEST_TEMPLATE_ T14, GTEST_TEMPLATE_ T15,
    GTEST_TEMPLATE_ T16, GTEST_TEMPLATE_ T17, GTEST_TEMPLATE_ T18,
    GTEST_TEMPLATE_ T19, GTEST_TEMPLATE_ T20, GTEST_TEMPLATE_ T21,
    GTEST_TEMPLATE_ T22, GTEST_TEMPLATE_ T23, GTEST_TEMPLATE_ T24,
    GTEST_TEMPLATE_ T25, GTEST_TEMPLATE_ T26, GTEST_TEMPLATE_ T27,
    GTEST_TEMPLATE_ T28, GTEST_TEMPLATE_ T29, GTEST_TEMPLATE_ T30,
    GTEST_TEMPLATE_ T31, GTEST_TEMPLATE_ T32, GTEST_TEMPLATE_ T33,
    GTEST_TEMPLATE_ T34, GTEST_TEMPLATE_ T35>
struct Templates<T1, T2, T3, T4, T5, T6, T7, T8, T9, T10, T11, T12, T13, T14,
    T15, T16, T17, T18, T19, T20, T21, T22, T23, T24, T25, T26, T27, T28, T29,
    T30, T31, T32, T33, T34, T35, NoneT, NoneT, NoneT, NoneT, NoneT, NoneT,
    NoneT, NoneT, NoneT, NoneT, NoneT, NoneT, NoneT, NoneT, NoneT> {
  typedef Templates35<T1, T2, T3, T4, T5, T6, T7, T8, T9, T10, T11, T12, T13,
      T14, T15, T16, T17, T18, T19, T20, T21, T22, T23, T24, T25, T26, T27,
      T28, T29, T30, T31, T32, T33, T34, T35> type;
};
template <GTEST_TEMPLATE_ T1, GTEST_TEMPLATE_ T2, GTEST_TEMPLATE_ T3,
    GTEST_TEMPLATE_ T4, GTEST_TEMPLATE_ T5, GTEST_TEMPLATE_ T6,
    GTEST_TEMPLATE_ T7, GTEST_TEMPLATE_ T8, GTEST_TEMPLATE_ T9,
    GTEST_TEMPLATE_ T10, GTEST_TEMPLATE_ T11, GTEST_TEMPLATE_ T12,
    GTEST_TEMPLATE_ T13, GTEST_TEMPLATE_ T14, GTEST_TEMPLATE_ T15,
    GTEST_TEMPLATE_ T16, GTEST_TEMPLATE_ T17, GTEST_TEMPLATE_ T18,
    GTEST_TEMPLATE_ T19, GTEST_TEMPLATE_ T20, GTEST_TEMPLATE_ T21,
    GTEST_TEMPLATE_ T22, GTEST_TEMPLATE_ T23, GTEST_TEMPLATE_ T24,
    GTEST_TEMPLATE_ T25, GTEST_TEMPLATE_ T26, GTEST_TEMPLATE_ T27,
    GTEST_TEMPLATE_ T28, GTEST_TEMPLATE_ T29, GTEST_TEMPLATE_ T30,
    GTEST_TEMPLATE_ T31, GTEST_TEMPLATE_ T32, GTEST_TEMPLATE_ T33,
    GTEST_TEMPLATE_ T34, GTEST_TEMPLATE_ T35, GTEST_TEMPLATE_ T36>
struct Templates<T1, T2, T3, T4, T5, T6, T7, T8, T9, T10, T11, T12, T13, T14,
    T15, T16, T17, T18, T19, T20, T21, T22, T23, T24, T25, T26, T27, T28, T29,
    T30, T31, T32, T33, T34, T35, T36, NoneT, NoneT, NoneT, NoneT, NoneT,
    NoneT, NoneT, NoneT, NoneT, NoneT, NoneT, NoneT, NoneT, NoneT> {
  typedef Templates36<T1, T2, T3, T4, T5, T6, T7, T8, T9, T10, T11, T12, T13,
      T14, T15, T16, T17, T18, T19, T20, T21, T22, T23, T24, T25, T26, T27,
      T28, T29, T30, T31, T32, T33, T34, T35, T36> type;
};
template <GTEST_TEMPLATE_ T1, GTEST_TEMPLATE_ T2, GTEST_TEMPLATE_ T3,
    GTEST_TEMPLATE_ T4, GTEST_TEMPLATE_ T5, GTEST_TEMPLATE_ T6,
    GTEST_TEMPLATE_ T7, GTEST_TEMPLATE_ T8, GTEST_TEMPLATE_ T9,
    GTEST_TEMPLATE_ T10, GTEST_TEMPLATE_ T11, GTEST_TEMPLATE_ T12,
    GTEST_TEMPLATE_ T13, GTEST_TEMPLATE_ T14, GTEST_TEMPLATE_ T15,
    GTEST_TEMPLATE_ T16, GTEST_TEMPLATE_ T17, GTEST_TEMPLATE_ T18,
    GTEST_TEMPLATE_ T19, GTEST_TEMPLATE_ T20, GTEST_TEMPLATE_ T21,
    GTEST_TEMPLATE_ T22, GTEST_TEMPLATE_ T23, GTEST_TEMPLATE_ T24,
    GTEST_TEMPLATE_ T25, GTEST_TEMPLATE_ T26, GTEST_TEMPLATE_ T27,
    GTEST_TEMPLATE_ T28, GTEST_TEMPLATE_ T29, GTEST_TEMPLATE_ T30,
    GTEST_TEMPLATE_ T31, GTEST_TEMPLATE_ T32, GTEST_TEMPLATE_ T33,
    GTEST_TEMPLATE_ T34, GTEST_TEMPLATE_ T35, GTEST_TEMPLATE_ T36,
    GTEST_TEMPLATE_ T37>
struct Templates<T1, T2, T3, T4, T5, T6, T7, T8, T9, T10, T11, T12, T13, T14,
    T15, T16, T17, T18, T19, T20, T21, T22, T23, T24, T25, T26, T27, T28, T29,
    T30, T31, T32, T33, T34, T35, T36, T37, NoneT, NoneT, NoneT, NoneT, NoneT,
    NoneT, NoneT, NoneT, NoneT, NoneT, NoneT, NoneT, NoneT> {
  typedef Templates37<T1, T2, T3, T4, T5, T6, T7, T8, T9, T10, T11, T12, T13,
      T14, T15, T16, T17, T18, T19, T20, T21, T22, T23, T24, T25, T26, T27,
      T28, T29, T30, T31, T32, T33, T34, T35, T36, T37> type;
};
template <GTEST_TEMPLATE_ T1, GTEST_TEMPLATE_ T2, GTEST_TEMPLATE_ T3,
    GTEST_TEMPLATE_ T4, GTEST_TEMPLATE_ T5, GTEST_TEMPLATE_ T6,
    GTEST_TEMPLATE_ T7, GTEST_TEMPLATE_ T8, GTEST_TEMPLATE_ T9,
    GTEST_TEMPLATE_ T10, GTEST_TEMPLATE_ T11, GTEST_TEMPLATE_ T12,
    GTEST_TEMPLATE_ T13, GTEST_TEMPLATE_ T14, GTEST_TEMPLATE_ T15,
    GTEST_TEMPLATE_ T16, GTEST_TEMPLATE_ T17, GTEST_TEMPLATE_ T18,
    GTEST_TEMPLATE_ T19, GTEST_TEMPLATE_ T20, GTEST_TEMPLATE_ T21,
    GTEST_TEMPLATE_ T22, GTEST_TEMPLATE_ T23, GTEST_TEMPLATE_ T24,
    GTEST_TEMPLATE_ T25, GTEST_TEMPLATE_ T26, GTEST_TEMPLATE_ T27,
    GTEST_TEMPLATE_ T28, GTEST_TEMPLATE_ T29, GTEST_TEMPLATE_ T30,
    GTEST_TEMPLATE_ T31, GTEST_TEMPLATE_ T32, GTEST_TEMPLATE_ T33,
    GTEST_TEMPLATE_ T34, GTEST_TEMPLATE_ T35, GTEST_TEMPLATE_ T36,
    GTEST_TEMPLATE_ T37, GTEST_TEMPLATE_ T38>
struct Templates<T1, T2, T3, T4, T5, T6, T7, T8, T9, T10, T11, T12, T13, T14,
    T15, T16, T17, T18, T19, T20, T21, T22, T23, T24, T25, T26, T27, T28, T29,
    T30, T31, T32, T33, T34, T35, T36, T37, T38, NoneT, NoneT, NoneT, NoneT,
    NoneT, NoneT, NoneT, NoneT, NoneT, NoneT, NoneT, NoneT> {
  typedef Templates38<T1, T2, T3, T4, T5, T6, T7, T8, T9, T10, T11, T12, T13,
      T14, T15, T16, T17, T18, T19, T20, T21, T22, T23, T24, T25, T26, T27,
      T28, T29, T30, T31, T32, T33, T34, T35, T36, T37, T38> type;
};
template <GTEST_TEMPLATE_ T1, GTEST_TEMPLATE_ T2, GTEST_TEMPLATE_ T3,
    GTEST_TEMPLATE_ T4, GTEST_TEMPLATE_ T5, GTEST_TEMPLATE_ T6,
    GTEST_TEMPLATE_ T7, GTEST_TEMPLATE_ T8, GTEST_TEMPLATE_ T9,
    GTEST_TEMPLATE_ T10, GTEST_TEMPLATE_ T11, GTEST_TEMPLATE_ T12,
    GTEST_TEMPLATE_ T13, GTEST_TEMPLATE_ T14, GTEST_TEMPLATE_ T15,
    GTEST_TEMPLATE_ T16, GTEST_TEMPLATE_ T17, GTEST_TEMPLATE_ T18,
    GTEST_TEMPLATE_ T19, GTEST_TEMPLATE_ T20, GTEST_TEMPLATE_ T21,
    GTEST_TEMPLATE_ T22, GTEST_TEMPLATE_ T23, GTEST_TEMPLATE_ T24,
    GTEST_TEMPLATE_ T25, GTEST_TEMPLATE_ T26, GTEST_TEMPLATE_ T27,
    GTEST_TEMPLATE_ T28, GTEST_TEMPLATE_ T29, GTEST_TEMPLATE_ T30,
    GTEST_TEMPLATE_ T31, GTEST_TEMPLATE_ T32, GTEST_TEMPLATE_ T33,
    GTEST_TEMPLATE_ T34, GTEST_TEMPLATE_ T35, GTEST_TEMPLATE_ T36,
    GTEST_TEMPLATE_ T37, GTEST_TEMPLATE_ T38, GTEST_TEMPLATE_ T39>
struct Templates<T1, T2, T3, T4, T5, T6, T7, T8, T9, T10, T11, T12, T13, T14,
    T15, T16, T17, T18, T19, T20, T21, T22, T23, T24, T25, T26, T27, T28, T29,
    T30, T31, T32, T33, T34, T35, T36, T37, T38, T39, NoneT, NoneT, NoneT,
    NoneT, NoneT, NoneT, NoneT, NoneT, NoneT, NoneT, NoneT> {
  typedef Templates39<T1, T2, T3, T4, T5, T6, T7, T8, T9, T10, T11, T12, T13,
      T14, T15, T16, T17, T18, T19, T20, T21, T22, T23, T24, T25, T26, T27,
      T28, T29, T30, T31, T32, T33, T34, T35, T36, T37, T38, T39> type;
};
template <GTEST_TEMPLATE_ T1, GTEST_TEMPLATE_ T2, GTEST_TEMPLATE_ T3,
    GTEST_TEMPLATE_ T4, GTEST_TEMPLATE_ T5, GTEST_TEMPLATE_ T6,
    GTEST_TEMPLATE_ T7, GTEST_TEMPLATE_ T8, GTEST_TEMPLATE_ T9,
    GTEST_TEMPLATE_ T10, GTEST_TEMPLATE_ T11, GTEST_TEMPLATE_ T12,
    GTEST_TEMPLATE_ T13, GTEST_TEMPLATE_ T14, GTEST_TEMPLATE_ T15,
    GTEST_TEMPLATE_ T16, GTEST_TEMPLATE_ T17, GTEST_TEMPLATE_ T18,
    GTEST_TEMPLATE_ T19, GTEST_TEMPLATE_ T20, GTEST_TEMPLATE_ T21,
    GTEST_TEMPLATE_ T22, GTEST_TEMPLATE_ T23, GTEST_TEMPLATE_ T24,
    GTEST_TEMPLATE_ T25, GTEST_TEMPLATE_ T26, GTEST_TEMPLATE_ T27,
    GTEST_TEMPLATE_ T28, GTEST_TEMPLATE_ T29, GTEST_TEMPLATE_ T30,
    GTEST_TEMPLATE_ T31, GTEST_TEMPLATE_ T32, GTEST_TEMPLATE_ T33,
    GTEST_TEMPLATE_ T34, GTEST_TEMPLATE_ T35, GTEST_TEMPLATE_ T36,
    GTEST_TEMPLATE_ T37, GTEST_TEMPLATE_ T38, GTEST_TEMPLATE_ T39,
    GTEST_TEMPLATE_ T40>
struct Templates<T1, T2, T3, T4, T5, T6, T7, T8, T9, T10, T11, T12, T13, T14,
    T15, T16, T17, T18, T19, T20, T21, T22, T23, T24, T25, T26, T27, T28, T29,
    T30, T31, T32, T33, T34, T35, T36, T37, T38, T39, T40, NoneT, NoneT, NoneT,
    NoneT, NoneT, NoneT, NoneT, NoneT, NoneT, NoneT> {
  typedef Templates40<T1, T2, T3, T4, T5, T6, T7, T8, T9, T10, T11, T12, T13,
      T14, T15, T16, T17, T18, T19, T20, T21, T22, T23, T24, T25, T26, T27,
      T28, T29, T30, T31, T32, T33, T34, T35, T36, T37, T38, T39, T40> type;
};
template <GTEST_TEMPLATE_ T1, GTEST_TEMPLATE_ T2, GTEST_TEMPLATE_ T3,
    GTEST_TEMPLATE_ T4, GTEST_TEMPLATE_ T5, GTEST_TEMPLATE_ T6,
    GTEST_TEMPLATE_ T7, GTEST_TEMPLATE_ T8, GTEST_TEMPLATE_ T9,
    GTEST_TEMPLATE_ T10, GTEST_TEMPLATE_ T11, GTEST_TEMPLATE_ T12,
    GTEST_TEMPLATE_ T13, GTEST_TEMPLATE_ T14, GTEST_TEMPLATE_ T15,
    GTEST_TEMPLATE_ T16, GTEST_TEMPLATE_ T17, GTEST_TEMPLATE_ T18,
    GTEST_TEMPLATE_ T19, GTEST_TEMPLATE_ T20, GTEST_TEMPLATE_ T21,
    GTEST_TEMPLATE_ T22, GTEST_TEMPLATE_ T23, GTEST_TEMPLATE_ T24,
    GTEST_TEMPLATE_ T25, GTEST_TEMPLATE_ T26, GTEST_TEMPLATE_ T27,
    GTEST_TEMPLATE_ T28, GTEST_TEMPLATE_ T29, GTEST_TEMPLATE_ T30,
    GTEST_TEMPLATE_ T31, GTEST_TEMPLATE_ T32, GTEST_TEMPLATE_ T33,
    GTEST_TEMPLATE_ T34, GTEST_TEMPLATE_ T35, GTEST_TEMPLATE_ T36,
    GTEST_TEMPLATE_ T37, GTEST_TEMPLATE_ T38, GTEST_TEMPLATE_ T39,
    GTEST_TEMPLATE_ T40, GTEST_TEMPLATE_ T41>
struct Templates<T1, T2, T3, T4, T5, T6, T7, T8, T9, T10, T11, T12, T13, T14,
    T15, T16, T17, T18, T19, T20, T21, T22, T23, T24, T25, T26, T27, T28, T29,
    T30, T31, T32, T33, T34, T35, T36, T37, T38, T39, T40, T41, NoneT, NoneT,
    NoneT, NoneT, NoneT, NoneT, NoneT, NoneT, NoneT> {
  typedef Templates41<T1, T2, T3, T4, T5, T6, T7, T8, T9, T10, T11, T12, T13,
      T14, T15, T16, T17, T18, T19, T20, T21, T22, T23, T24, T25, T26, T27,
      T28, T29, T30, T31, T32, T33, T34, T35, T36, T37, T38, T39, T40,
      T41> type;
};
template <GTEST_TEMPLATE_ T1, GTEST_TEMPLATE_ T2, GTEST_TEMPLATE_ T3,
    GTEST_TEMPLATE_ T4, GTEST_TEMPLATE_ T5, GTEST_TEMPLATE_ T6,
    GTEST_TEMPLATE_ T7, GTEST_TEMPLATE_ T8, GTEST_TEMPLATE_ T9,
    GTEST_TEMPLATE_ T10, GTEST_TEMPLATE_ T11, GTEST_TEMPLATE_ T12,
    GTEST_TEMPLATE_ T13, GTEST_TEMPLATE_ T14, GTEST_TEMPLATE_ T15,
    GTEST_TEMPLATE_ T16, GTEST_TEMPLATE_ T17, GTEST_TEMPLATE_ T18,
    GTEST_TEMPLATE_ T19, GTEST_TEMPLATE_ T20, GTEST_TEMPLATE_ T21,
    GTEST_TEMPLATE_ T22, GTEST_TEMPLATE_ T23, GTEST_TEMPLATE_ T24,
    GTEST_TEMPLATE_ T25, GTEST_TEMPLATE_ T26, GTEST_TEMPLATE_ T27,
    GTEST_TEMPLATE_ T28, GTEST_TEMPLATE_ T29, GTEST_TEMPLATE_ T30,
    GTEST_TEMPLATE_ T31, GTEST_TEMPLATE_ T32, GTEST_TEMPLATE_ T33,
    GTEST_TEMPLATE_ T34, GTEST_TEMPLATE_ T35, GTEST_TEMPLATE_ T36,
    GTEST_TEMPLATE_ T37, GTEST_TEMPLATE_ T38, GTEST_TEMPLATE_ T39,
    GTEST_TEMPLATE_ T40, GTEST_TEMPLATE_ T41, GTEST_TEMPLATE_ T42>
struct Templates<T1, T2, T3, T4, T5, T6, T7, T8, T9, T10, T11, T12, T13, T14,
    T15, T16, T17, T18, T19, T20, T21, T22, T23, T24, T25, T26, T27, T28, T29,
    T30, T31, T32, T33, T34, T35, T36, T37, T38, T39, T40, T41, T42, NoneT,
    NoneT, NoneT, NoneT, NoneT, NoneT, NoneT, NoneT> {
  typedef Templates42<T1, T2, T3, T4, T5, T6, T7, T8, T9, T10, T11, T12, T13,
      T14, T15, T16, T17, T18, T19, T20, T21, T22, T23, T24, T25, T26, T27,
      T28, T29, T30, T31, T32, T33, T34, T35, T36, T37, T38, T39, T40, T41,
      T42> type;
};
template <GTEST_TEMPLATE_ T1, GTEST_TEMPLATE_ T2, GTEST_TEMPLATE_ T3,
    GTEST_TEMPLATE_ T4, GTEST_TEMPLATE_ T5, GTEST_TEMPLATE_ T6,
    GTEST_TEMPLATE_ T7, GTEST_TEMPLATE_ T8, GTEST_TEMPLATE_ T9,
    GTEST_TEMPLATE_ T10, GTEST_TEMPLATE_ T11, GTEST_TEMPLATE_ T12,
    GTEST_TEMPLATE_ T13, GTEST_TEMPLATE_ T14, GTEST_TEMPLATE_ T15,
    GTEST_TEMPLATE_ T16, GTEST_TEMPLATE_ T17, GTEST_TEMPLATE_ T18,
    GTEST_TEMPLATE_ T19, GTEST_TEMPLATE_ T20, GTEST_TEMPLATE_ T21,
    GTEST_TEMPLATE_ T22, GTEST_TEMPLATE_ T23, GTEST_TEMPLATE_ T24,
    GTEST_TEMPLATE_ T25, GTEST_TEMPLATE_ T26, GTEST_TEMPLATE_ T27,
    GTEST_TEMPLATE_ T28, GTEST_TEMPLATE_ T29, GTEST_TEMPLATE_ T30,
    GTEST_TEMPLATE_ T31, GTEST_TEMPLATE_ T32, GTEST_TEMPLATE_ T33,
    GTEST_TEMPLATE_ T34, GTEST_TEMPLATE_ T35, GTEST_TEMPLATE_ T36,
    GTEST_TEMPLATE_ T37, GTEST_TEMPLATE_ T38, GTEST_TEMPLATE_ T39,
    GTEST_TEMPLATE_ T40, GTEST_TEMPLATE_ T41, GTEST_TEMPLATE_ T42,
    GTEST_TEMPLATE_ T43>
struct Templates<T1, T2, T3, T4, T5, T6, T7, T8, T9, T10, T11, T12, T13, T14,
    T15, T16, T17, T18, T19, T20, T21, T22, T23, T24, T25, T26, T27, T28, T29,
    T30, T31, T32, T33, T34, T35, T36, T37, T38, T39, T40, T41, T42, T43,
    NoneT, NoneT, NoneT, NoneT, NoneT, NoneT, NoneT> {
  typedef Templates43<T1, T2, T3, T4, T5, T6, T7, T8, T9, T10, T11, T12, T13,
      T14, T15, T16, T17, T18, T19, T20, T21, T22, T23, T24, T25, T26, T27,
      T28, T29, T30, T31, T32, T33, T34, T35, T36, T37, T38, T39, T40, T41,
      T42, T43> type;
};
template <GTEST_TEMPLATE_ T1, GTEST_TEMPLATE_ T2, GTEST_TEMPLATE_ T3,
    GTEST_TEMPLATE_ T4, GTEST_TEMPLATE_ T5, GTEST_TEMPLATE_ T6,
    GTEST_TEMPLATE_ T7, GTEST_TEMPLATE_ T8, GTEST_TEMPLATE_ T9,
    GTEST_TEMPLATE_ T10, GTEST_TEMPLATE_ T11, GTEST_TEMPLATE_ T12,
    GTEST_TEMPLATE_ T13, GTEST_TEMPLATE_ T14, GTEST_TEMPLATE_ T15,
    GTEST_TEMPLATE_ T16, GTEST_TEMPLATE_ T17, GTEST_TEMPLATE_ T18,
    GTEST_TEMPLATE_ T19, GTEST_TEMPLATE_ T20, GTEST_TEMPLATE_ T21,
    GTEST_TEMPLATE_ T22, GTEST_TEMPLATE_ T23, GTEST_TEMPLATE_ T24,
    GTEST_TEMPLATE_ T25, GTEST_TEMPLATE_ T26, GTEST_TEMPLATE_ T27,
    GTEST_TEMPLATE_ T28, GTEST_TEMPLATE_ T29, GTEST_TEMPLATE_ T30,
    GTEST_TEMPLATE_ T31, GTEST_TEMPLATE_ T32, GTEST_TEMPLATE_ T33,
    GTEST_TEMPLATE_ T34, GTEST_TEMPLATE_ T35, GTEST_TEMPLATE_ T36,
    GTEST_TEMPLATE_ T37, GTEST_TEMPLATE_ T38, GTEST_TEMPLATE_ T39,
    GTEST_TEMPLATE_ T40, GTEST_TEMPLATE_ T41, GTEST_TEMPLATE_ T42,
    GTEST_TEMPLATE_ T43, GTEST_TEMPLATE_ T44>
struct Templates<T1, T2, T3, T4, T5, T6, T7, T8, T9, T10, T11, T12, T13, T14,
    T15, T16, T17, T18, T19, T20, T21, T22, T23, T24, T25, T26, T27, T28, T29,
    T30, T31, T32, T33, T34, T35, T36, T37, T38, T39, T40, T41, T42, T43, T44,
    NoneT, NoneT, NoneT, NoneT, NoneT, NoneT> {
  typedef Templates44<T1, T2, T3, T4, T5, T6, T7, T8, T9, T10, T11, T12, T13,
      T14, T15, T16, T17, T18, T19, T20, T21, T22, T23, T24, T25, T26, T27,
      T28, T29, T30, T31, T32, T33, T34, T35, T36, T37, T38, T39, T40, T41,
      T42, T43, T44> type;
};
template <GTEST_TEMPLATE_ T1, GTEST_TEMPLATE_ T2, GTEST_TEMPLATE_ T3,
    GTEST_TEMPLATE_ T4, GTEST_TEMPLATE_ T5, GTEST_TEMPLATE_ T6,
    GTEST_TEMPLATE_ T7, GTEST_TEMPLATE_ T8, GTEST_TEMPLATE_ T9,
    GTEST_TEMPLATE_ T10, GTEST_TEMPLATE_ T11, GTEST_TEMPLATE_ T12,
    GTEST_TEMPLATE_ T13, GTEST_TEMPLATE_ T14, GTEST_TEMPLATE_ T15,
    GTEST_TEMPLATE_ T16, GTEST_TEMPLATE_ T17, GTEST_TEMPLATE_ T18,
    GTEST_TEMPLATE_ T19, GTEST_TEMPLATE_ T20, GTEST_TEMPLATE_ T21,
    GTEST_TEMPLATE_ T22, GTEST_TEMPLATE_ T23, GTEST_TEMPLATE_ T24,
    GTEST_TEMPLATE_ T25, GTEST_TEMPLATE_ T26, GTEST_TEMPLATE_ T27,
    GTEST_TEMPLATE_ T28, GTEST_TEMPLATE_ T29, GTEST_TEMPLATE_ T30,
    GTEST_TEMPLATE_ T31, GTEST_TEMPLATE_ T32, GTEST_TEMPLATE_ T33,
    GTEST_TEMPLATE_ T34, GTEST_TEMPLATE_ T35, GTEST_TEMPLATE_ T36,
    GTEST_TEMPLATE_ T37, GTEST_TEMPLATE_ T38, GTEST_TEMPLATE_ T39,
    GTEST_TEMPLATE_ T40, GTEST_TEMPLATE_ T41, GTEST_TEMPLATE_ T42,
    GTEST_TEMPLATE_ T43, GTEST_TEMPLATE_ T44, GTEST_TEMPLATE_ T45>
struct Templates<T1, T2, T3, T4, T5, T6, T7, T8, T9, T10, T11, T12, T13, T14,
    T15, T16, T17, T18, T19, T20, T21, T22, T23, T24, T25, T26, T27, T28, T29,
    T30, T31, T32, T33, T34, T35, T36, T37, T38, T39, T40, T41, T42, T43, T44,
    T45, NoneT, NoneT, NoneT, NoneT, NoneT> {
  typedef Templates45<T1, T2, T3, T4, T5, T6, T7, T8, T9, T10, T11, T12, T13,
      T14, T15, T16, T17, T18, T19, T20, T21, T22, T23, T24, T25, T26, T27,
      T28, T29, T30, T31, T32, T33, T34, T35, T36, T37, T38, T39, T40, T41,
      T42, T43, T44, T45> type;
};
template <GTEST_TEMPLATE_ T1, GTEST_TEMPLATE_ T2, GTEST_TEMPLATE_ T3,
    GTEST_TEMPLATE_ T4, GTEST_TEMPLATE_ T5, GTEST_TEMPLATE_ T6,
    GTEST_TEMPLATE_ T7, GTEST_TEMPLATE_ T8, GTEST_TEMPLATE_ T9,
    GTEST_TEMPLATE_ T10, GTEST_TEMPLATE_ T11, GTEST_TEMPLATE_ T12,
    GTEST_TEMPLATE_ T13, GTEST_TEMPLATE_ T14, GTEST_TEMPLATE_ T15,
    GTEST_TEMPLATE_ T16, GTEST_TEMPLATE_ T17, GTEST_TEMPLATE_ T18,
    GTEST_TEMPLATE_ T19, GTEST_TEMPLATE_ T20, GTEST_TEMPLATE_ T21,
    GTEST_TEMPLATE_ T22, GTEST_TEMPLATE_ T23, GTEST_TEMPLATE_ T24,
    GTEST_TEMPLATE_ T25, GTEST_TEMPLATE_ T26, GTEST_TEMPLATE_ T27,
    GTEST_TEMPLATE_ T28, GTEST_TEMPLATE_ T29, GTEST_TEMPLATE_ T30,
    GTEST_TEMPLATE_ T31, GTEST_TEMPLATE_ T32, GTEST_TEMPLATE_ T33,
    GTEST_TEMPLATE_ T34, GTEST_TEMPLATE_ T35, GTEST_TEMPLATE_ T36,
    GTEST_TEMPLATE_ T37, GTEST_TEMPLATE_ T38, GTEST_TEMPLATE_ T39,
    GTEST_TEMPLATE_ T40, GTEST_TEMPLATE_ T41, GTEST_TEMPLATE_ T42,
    GTEST_TEMPLATE_ T43, GTEST_TEMPLATE_ T44, GTEST_TEMPLATE_ T45,
    GTEST_TEMPLATE_ T46>
struct Templates<T1, T2, T3, T4, T5, T6, T7, T8, T9, T10, T11, T12, T13, T14,
    T15, T16, T17, T18, T19, T20, T21, T22, T23, T24, T25, T26, T27, T28, T29,
    T30, T31, T32, T33, T34, T35, T36, T37, T38, T39, T40, T41, T42, T43, T44,
    T45, T46, NoneT, NoneT, NoneT, NoneT> {
  typedef Templates46<T1, T2, T3, T4, T5, T6, T7, T8, T9, T10, T11, T12, T13,
      T14, T15, T16, T17, T18, T19, T20, T21, T22, T23, T24, T25, T26, T27,
      T28, T29, T30, T31, T32, T33, T34, T35, T36, T37, T38, T39, T40, T41,
      T42, T43, T44, T45, T46> type;
};
template <GTEST_TEMPLATE_ T1, GTEST_TEMPLATE_ T2, GTEST_TEMPLATE_ T3,
    GTEST_TEMPLATE_ T4, GTEST_TEMPLATE_ T5, GTEST_TEMPLATE_ T6,
    GTEST_TEMPLATE_ T7, GTEST_TEMPLATE_ T8, GTEST_TEMPLATE_ T9,
    GTEST_TEMPLATE_ T10, GTEST_TEMPLATE_ T11, GTEST_TEMPLATE_ T12,
    GTEST_TEMPLATE_ T13, GTEST_TEMPLATE_ T14, GTEST_TEMPLATE_ T15,
    GTEST_TEMPLATE_ T16, GTEST_TEMPLATE_ T17, GTEST_TEMPLATE_ T18,
    GTEST_TEMPLATE_ T19, GTEST_TEMPLATE_ T20, GTEST_TEMPLATE_ T21,
    GTEST_TEMPLATE_ T22, GTEST_TEMPLATE_ T23, GTEST_TEMPLATE_ T24,
    GTEST_TEMPLATE_ T25, GTEST_TEMPLATE_ T26, GTEST_TEMPLATE_ T27,
    GTEST_TEMPLATE_ T28, GTEST_TEMPLATE_ T29, GTEST_TEMPLATE_ T30,
    GTEST_TEMPLATE_ T31, GTEST_TEMPLATE_ T32, GTEST_TEMPLATE_ T33,
    GTEST_TEMPLATE_ T34, GTEST_TEMPLATE_ T35, GTEST_TEMPLATE_ T36,
    GTEST_TEMPLATE_ T37, GTEST_TEMPLATE_ T38, GTEST_TEMPLATE_ T39,
    GTEST_TEMPLATE_ T40, GTEST_TEMPLATE_ T41, GTEST_TEMPLATE_ T42,
    GTEST_TEMPLATE_ T43, GTEST_TEMPLATE_ T44, GTEST_TEMPLATE_ T45,
    GTEST_TEMPLATE_ T46, GTEST_TEMPLATE_ T47>
struct Templates<T1, T2, T3, T4, T5, T6, T7, T8, T9, T10, T11, T12, T13, T14,
    T15, T16, T17, T18, T19, T20, T21, T22, T23, T24, T25, T26, T27, T28, T29,
    T30, T31, T32, T33, T34, T35, T36, T37, T38, T39, T40, T41, T42, T43, T44,
    T45, T46, T47, NoneT, NoneT, NoneT> {
  typedef Templates47<T1, T2, T3, T4, T5, T6, T7, T8, T9, T10, T11, T12, T13,
      T14, T15, T16, T17, T18, T19, T20, T21, T22, T23, T24, T25, T26, T27,
      T28, T29, T30, T31, T32, T33, T34, T35, T36, T37, T38, T39, T40, T41,
      T42, T43, T44, T45, T46, T47> type;
};
template <GTEST_TEMPLATE_ T1, GTEST_TEMPLATE_ T2, GTEST_TEMPLATE_ T3,
    GTEST_TEMPLATE_ T4, GTEST_TEMPLATE_ T5, GTEST_TEMPLATE_ T6,
    GTEST_TEMPLATE_ T7, GTEST_TEMPLATE_ T8, GTEST_TEMPLATE_ T9,
    GTEST_TEMPLATE_ T10, GTEST_TEMPLATE_ T11, GTEST_TEMPLATE_ T12,
    GTEST_TEMPLATE_ T13, GTEST_TEMPLATE_ T14, GTEST_TEMPLATE_ T15,
    GTEST_TEMPLATE_ T16, GTEST_TEMPLATE_ T17, GTEST_TEMPLATE_ T18,
    GTEST_TEMPLATE_ T19, GTEST_TEMPLATE_ T20, GTEST_TEMPLATE_ T21,
    GTEST_TEMPLATE_ T22, GTEST_TEMPLATE_ T23, GTEST_TEMPLATE_ T24,
    GTEST_TEMPLATE_ T25, GTEST_TEMPLATE_ T26, GTEST_TEMPLATE_ T27,
    GTEST_TEMPLATE_ T28, GTEST_TEMPLATE_ T29, GTEST_TEMPLATE_ T30,
    GTEST_TEMPLATE_ T31, GTEST_TEMPLATE_ T32, GTEST_TEMPLATE_ T33,
    GTEST_TEMPLATE_ T34, GTEST_TEMPLATE_ T35, GTEST_TEMPLATE_ T36,
    GTEST_TEMPLATE_ T37, GTEST_TEMPLATE_ T38, GTEST_TEMPLATE_ T39,
    GTEST_TEMPLATE_ T40, GTEST_TEMPLATE_ T41, GTEST_TEMPLATE_ T42,
    GTEST_TEMPLATE_ T43, GTEST_TEMPLATE_ T44, GTEST_TEMPLATE_ T45,
    GTEST_TEMPLATE_ T46, GTEST_TEMPLATE_ T47, GTEST_TEMPLATE_ T48>
struct Templates<T1, T2, T3, T4, T5, T6, T7, T8, T9, T10, T11, T12, T13, T14,
    T15, T16, T17, T18, T19, T20, T21, T22, T23, T24, T25, T26, T27, T28, T29,
    T30, T31, T32, T33, T34, T35, T36, T37, T38, T39, T40, T41, T42, T43, T44,
    T45, T46, T47, T48, NoneT, NoneT> {
  typedef Templates48<T1, T2, T3, T4, T5, T6, T7, T8, T9, T10, T11, T12, T13,
      T14, T15, T16, T17, T18, T19, T20, T21, T22, T23, T24, T25, T26, T27,
      T28, T29, T30, T31, T32, T33, T34, T35, T36, T37, T38, T39, T40, T41,
      T42, T43, T44, T45, T46, T47, T48> type;
};
template <GTEST_TEMPLATE_ T1, GTEST_TEMPLATE_ T2, GTEST_TEMPLATE_ T3,
    GTEST_TEMPLATE_ T4, GTEST_TEMPLATE_ T5, GTEST_TEMPLATE_ T6,
    GTEST_TEMPLATE_ T7, GTEST_TEMPLATE_ T8, GTEST_TEMPLATE_ T9,
    GTEST_TEMPLATE_ T10, GTEST_TEMPLATE_ T11, GTEST_TEMPLATE_ T12,
    GTEST_TEMPLATE_ T13, GTEST_TEMPLATE_ T14, GTEST_TEMPLATE_ T15,
    GTEST_TEMPLATE_ T16, GTEST_TEMPLATE_ T17, GTEST_TEMPLATE_ T18,
    GTEST_TEMPLATE_ T19, GTEST_TEMPLATE_ T20, GTEST_TEMPLATE_ T21,
    GTEST_TEMPLATE_ T22, GTEST_TEMPLATE_ T23, GTEST_TEMPLATE_ T24,
    GTEST_TEMPLATE_ T25, GTEST_TEMPLATE_ T26, GTEST_TEMPLATE_ T27,
    GTEST_TEMPLATE_ T28, GTEST_TEMPLATE_ T29, GTEST_TEMPLATE_ T30,
    GTEST_TEMPLATE_ T31, GTEST_TEMPLATE_ T32, GTEST_TEMPLATE_ T33,
    GTEST_TEMPLATE_ T34, GTEST_TEMPLATE_ T35, GTEST_TEMPLATE_ T36,
    GTEST_TEMPLATE_ T37, GTEST_TEMPLATE_ T38, GTEST_TEMPLATE_ T39,
    GTEST_TEMPLATE_ T40, GTEST_TEMPLATE_ T41, GTEST_TEMPLATE_ T42,
    GTEST_TEMPLATE_ T43, GTEST_TEMPLATE_ T44, GTEST_TEMPLATE_ T45,
    GTEST_TEMPLATE_ T46, GTEST_TEMPLATE_ T47, GTEST_TEMPLATE_ T48,
    GTEST_TEMPLATE_ T49>
struct Templates<T1, T2, T3, T4, T5, T6, T7, T8, T9, T10, T11, T12, T13, T14,
    T15, T16, T17, T18, T19, T20, T21, T22, T23, T24, T25, T26, T27, T28, T29,
    T30, T31, T32, T33, T34, T35, T36, T37, T38, T39, T40, T41, T42, T43, T44,
    T45, T46, T47, T48, T49, NoneT> {
  typedef Templates49<T1, T2, T3, T4, T5, T6, T7, T8, T9, T10, T11, T12, T13,
      T14, T15, T16, T17, T18, T19, T20, T21, T22, T23, T24, T25, T26, T27,
      T28, T29, T30, T31, T32, T33, T34, T35, T36, T37, T38, T39, T40, T41,
      T42, T43, T44, T45, T46, T47, T48, T49> type;
};

// The TypeList template makes it possible to use either a single type
// or a Types<...> list in TYPED_TEST_CASE() and
// INSTANTIATE_TYPED_TEST_CASE_P().

template <typename T>
struct TypeList {
  typedef Types1<T> type;
};

template <typename T1, typename T2, typename T3, typename T4, typename T5,
    typename T6, typename T7, typename T8, typename T9, typename T10,
    typename T11, typename T12, typename T13, typename T14, typename T15,
    typename T16, typename T17, typename T18, typename T19, typename T20,
    typename T21, typename T22, typename T23, typename T24, typename T25,
    typename T26, typename T27, typename T28, typename T29, typename T30,
    typename T31, typename T32, typename T33, typename T34, typename T35,
    typename T36, typename T37, typename T38, typename T39, typename T40,
    typename T41, typename T42, typename T43, typename T44, typename T45,
    typename T46, typename T47, typename T48, typename T49, typename T50>
struct TypeList<Types<T1, T2, T3, T4, T5, T6, T7, T8, T9, T10, T11, T12, T13,
    T14, T15, T16, T17, T18, T19, T20, T21, T22, T23, T24, T25, T26, T27, T28,
    T29, T30, T31, T32, T33, T34, T35, T36, T37, T38, T39, T40, T41, T42, T43,
    T44, T45, T46, T47, T48, T49, T50> > {
  typedef typename Types<T1, T2, T3, T4, T5, T6, T7, T8, T9, T10, T11, T12,
      T13, T14, T15, T16, T17, T18, T19, T20, T21, T22, T23, T24, T25, T26,
      T27, T28, T29, T30, T31, T32, T33, T34, T35, T36, T37, T38, T39, T40,
      T41, T42, T43, T44, T45, T46, T47, T48, T49, T50>::type type;
};

#endif  // GTEST_HAS_TYPED_TEST || GTEST_HAS_TYPED_TEST_P

}  // namespace internal
}  // namespace testing

#endif  // GTEST_INCLUDE_GTEST_INTERNAL_GTEST_TYPE_UTIL_H_

// Due to C++ preprocessor weirdness, we need double indirection to
// concatenate two tokens when one of them is __LINE__.  Writing
//
//   foo ## __LINE__
//
// will result in the token foo__LINE__, instead of foo followed by
// the current line number.  For more details, see
// http://www.parashift.com/c++-faq-lite/misc-technical-issues.html#faq-39.6
#define GTEST_CONCAT_TOKEN_(foo, bar) GTEST_CONCAT_TOKEN_IMPL_(foo, bar)
#define GTEST_CONCAT_TOKEN_IMPL_(foo, bar) foo ## bar

class ProtocolMessage;
namespace proto2 { class Message; }

namespace testing {

// Forward declarations.

class AssertionResult;                 // Result of an assertion.
class Message;                         // Represents a failure message.
class Test;                            // Represents a test.
class TestInfo;                        // Information about a test.
class TestPartResult;                  // Result of a test part.
class UnitTest;                        // A collection of test cases.

template <typename T>
::std::string PrintToString(const T& value);

namespace internal {

struct TraceInfo;                      // Information about a trace point.
class ScopedTrace;                     // Implements scoped trace.
class TestInfoImpl;                    // Opaque implementation of TestInfo
class UnitTestImpl;                    // Opaque implementation of UnitTest

// The text used in failure messages to indicate the start of the
// stack trace.
GTEST_API_ extern const char kStackTraceMarker[];

// Two overloaded helpers for checking at compile time whether an
// expression is a null pointer literal (i.e. NULL or any 0-valued
// compile-time integral constant).  Their return values have
// different sizes, so we can use sizeof() to test which version is
// picked by the compiler.  These helpers have no implementations, as
// we only need their signatures.
//
// Given IsNullLiteralHelper(x), the compiler will pick the first
// version if x can be implicitly converted to Secret*, and pick the
// second version otherwise.  Since Secret is a secret and incomplete
// type, the only expression a user can write that has type Secret* is
// a null pointer literal.  Therefore, we know that x is a null
// pointer literal if and only if the first version is picked by the
// compiler.
char IsNullLiteralHelper(Secret* p);
char (&IsNullLiteralHelper(...))[2];  // NOLINT

// A compile-time bool constant that is true if and only if x is a
// null pointer literal (i.e. NULL or any 0-valued compile-time
// integral constant).
#ifdef GTEST_ELLIPSIS_NEEDS_POD_
// We lose support for NULL detection where the compiler doesn't like
// passing non-POD classes through ellipsis (...).
# define GTEST_IS_NULL_LITERAL_(x) false
#else
# define GTEST_IS_NULL_LITERAL_(x) \
    (sizeof(::testing::internal::IsNullLiteralHelper(x)) == 1)
#endif  // GTEST_ELLIPSIS_NEEDS_POD_

// Appends the user-supplied message to the Google-Test-generated message.
GTEST_API_ std::string AppendUserMessage(
    const std::string& gtest_msg, const Message& user_msg);

#if GTEST_HAS_EXCEPTIONS

// This exception is thrown by (and only by) a failed Google Test
// assertion when GTEST_FLAG(throw_on_failure) is true (if exceptions
// are enabled).  We derive it from std::runtime_error, which is for
// errors presumably detectable only at run time.  Since
// std::runtime_error inherits from std::exception, many testing
// frameworks know how to extract and print the message inside it.
class GTEST_API_ GoogleTestFailureException : public ::std::runtime_error {
 public:
  explicit GoogleTestFailureException(const TestPartResult& failure);
};

#endif  // GTEST_HAS_EXCEPTIONS

// A helper class for creating scoped traces in user programs.
class GTEST_API_ ScopedTrace {
 public:
  // The c'tor pushes the given source file location and message onto
  // a trace stack maintained by Google Test.
  ScopedTrace(const char* file, int line, const Message& message);

  // The d'tor pops the info pushed by the c'tor.
  //
  // Note that the d'tor is not virtual in order to be efficient.
  // Don't inherit from ScopedTrace!
  ~ScopedTrace();

 private:
  GTEST_DISALLOW_COPY_AND_ASSIGN_(ScopedTrace);
} GTEST_ATTRIBUTE_UNUSED_;  // A ScopedTrace object does its job in its
                            // c'tor and d'tor.  Therefore it doesn't
                            // need to be used otherwise.

namespace edit_distance {
// Returns the optimal edits to go from 'left' to 'right'.
// All edits cost the same, with replace having lower priority than
// add/remove.
// Simple implementation of the Wagner–Fischer algorithm.
// See http://en.wikipedia.org/wiki/Wagner-Fischer_algorithm
enum EditType { kMatch, kAdd, kRemove, kReplace };
GTEST_API_ std::vector<EditType> CalculateOptimalEdits(
    const std::vector<size_t>& left, const std::vector<size_t>& right);

// Same as above, but the input is represented as strings.
GTEST_API_ std::vector<EditType> CalculateOptimalEdits(
    const std::vector<std::string>& left,
    const std::vector<std::string>& right);

// Create a diff of the input strings in Unified diff format.
GTEST_API_ std::string CreateUnifiedDiff(const std::vector<std::string>& left,
                                         const std::vector<std::string>& right,
                                         size_t context = 2);

}  // namespace edit_distance

// Calculate the diff between 'left' and 'right' and return it in unified diff
// format.
// If not null, stores in 'total_line_count' the total number of lines found
// in left + right.
GTEST_API_ std::string DiffStrings(const std::string& left,
                                   const std::string& right,
                                   size_t* total_line_count);

// Constructs and returns the message for an equality assertion
// (e.g. ASSERT_EQ, EXPECT_STREQ, etc) failure.
//
// The first four parameters are the expressions used in the assertion
// and their values, as strings.  For example, for ASSERT_EQ(foo, bar)
// where foo is 5 and bar is 6, we have:
//
//   expected_expression: "foo"
//   actual_expression:   "bar"
//   expected_value:      "5"
//   actual_value:        "6"
//
// The ignoring_case parameter is true iff the assertion is a
// *_STRCASEEQ*.  When it's true, the string " (ignoring case)" will
// be inserted into the message.
GTEST_API_ AssertionResult EqFailure(const char* expected_expression,
                                     const char* actual_expression,
                                     const std::string& expected_value,
                                     const std::string& actual_value,
                                     bool ignoring_case);

// Constructs a failure message for Boolean assertions such as EXPECT_TRUE.
GTEST_API_ std::string GetBoolAssertionFailureMessage(
    const AssertionResult& assertion_result,
    const char* expression_text,
    const char* actual_predicate_value,
    const char* expected_predicate_value);

// This template class represents an IEEE floating-point number
// (either single-precision or double-precision, depending on the
// template parameters).
//
// The purpose of this class is to do more sophisticated number
// comparison.  (Due to round-off error, etc, it's very unlikely that
// two floating-points will be equal exactly.  Hence a naive
// comparison by the == operation often doesn't work.)
//
// Format of IEEE floating-point:
//
//   The most-significant bit being the leftmost, an IEEE
//   floating-point looks like
//
//     sign_bit exponent_bits fraction_bits
//
//   Here, sign_bit is a single bit that designates the sign of the
//   number.
//
//   For float, there are 8 exponent bits and 23 fraction bits.
//
//   For double, there are 11 exponent bits and 52 fraction bits.
//
//   More details can be found at
//   http://en.wikipedia.org/wiki/IEEE_floating-point_standard.
//
// Template parameter:
//
//   RawType: the raw floating-point type (either float or double)
template <typename RawType>
class FloatingPoint {
 public:
  // Defines the unsigned integer type that has the same size as the
  // floating point number.
  typedef typename TypeWithSize<sizeof(RawType)>::UInt Bits;

  // Constants.

  // # of bits in a number.
  static const size_t kBitCount = 8*sizeof(RawType);

  // # of fraction bits in a number.
  static const size_t kFractionBitCount =
    std::numeric_limits<RawType>::digits - 1;

  // # of exponent bits in a number.
  static const size_t kExponentBitCount = kBitCount - 1 - kFractionBitCount;

  // The mask for the sign bit.
  static const Bits kSignBitMask = static_cast<Bits>(1) << (kBitCount - 1);

  // The mask for the fraction bits.
  static const Bits kFractionBitMask =
    ~static_cast<Bits>(0) >> (kExponentBitCount + 1);

  // The mask for the exponent bits.
  static const Bits kExponentBitMask = ~(kSignBitMask | kFractionBitMask);

  // How many ULP's (Units in the Last Place) we want to tolerate when
  // comparing two numbers.  The larger the value, the more error we
  // allow.  A 0 value means that two numbers must be exactly the same
  // to be considered equal.
  //
  // The maximum error of a single floating-point operation is 0.5
  // units in the last place.  On Intel CPU's, all floating-point
  // calculations are done with 80-bit precision, while double has 64
  // bits.  Therefore, 4 should be enough for ordinary use.
  //
  // See the following article for more details on ULP:
  // http://randomascii.wordpress.com/2012/02/25/comparing-floating-point-numbers-2012-edition/
  static const size_t kMaxUlps = 4;

  // Constructs a FloatingPoint from a raw floating-point number.
  //
  // On an Intel CPU, passing a non-normalized NAN (Not a Number)
  // around may change its bits, although the new value is guaranteed
  // to be also a NAN.  Therefore, don't expect this constructor to
  // preserve the bits in x when x is a NAN.
  explicit FloatingPoint(const RawType& x) { u_.value_ = x; }

  // Static methods

  // Reinterprets a bit pattern as a floating-point number.
  //
  // This function is needed to test the AlmostEquals() method.
  static RawType ReinterpretBits(const Bits bits) {
    FloatingPoint fp(0);
    fp.u_.bits_ = bits;
    return fp.u_.value_;
  }

  // Returns the floating-point number that represent positive infinity.
  static RawType Infinity() {
    return ReinterpretBits(kExponentBitMask);
  }

  // Returns the maximum representable finite floating-point number.
  static RawType Max();

  // Non-static methods

  // Returns the bits that represents this number.
  const Bits &bits() const { return u_.bits_; }

  // Returns the exponent bits of this number.
  Bits exponent_bits() const { return kExponentBitMask & u_.bits_; }

  // Returns the fraction bits of this number.
  Bits fraction_bits() const { return kFractionBitMask & u_.bits_; }

  // Returns the sign bit of this number.
  Bits sign_bit() const { return kSignBitMask & u_.bits_; }

  // Returns true iff this is NAN (not a number).
  bool is_nan() const {
    // It's a NAN if the exponent bits are all ones and the fraction
    // bits are not entirely zeros.
    return (exponent_bits() == kExponentBitMask) && (fraction_bits() != 0);
  }

  // Returns true iff this number is at most kMaxUlps ULP's away from
  // rhs.  In particular, this function:
  //
  //   - returns false if either number is (or both are) NAN.
  //   - treats really large numbers as almost equal to infinity.
  //   - thinks +0.0 and -0.0 are 0 DLP's apart.
  bool AlmostEquals(const FloatingPoint& rhs) const {
    // The IEEE standard says that any comparison operation involving
    // a NAN must return false.
    if (is_nan() || rhs.is_nan()) return false;

    return DistanceBetweenSignAndMagnitudeNumbers(u_.bits_, rhs.u_.bits_)
        <= kMaxUlps;
  }

 private:
  // The data type used to store the actual floating-point number.
  union FloatingPointUnion {
    RawType value_;  // The raw floating-point number.
    Bits bits_;      // The bits that represent the number.
  };

  // Converts an integer from the sign-and-magnitude representation to
  // the biased representation.  More precisely, let N be 2 to the
  // power of (kBitCount - 1), an integer x is represented by the
  // unsigned number x + N.
  //
  // For instance,
  //
  //   -N + 1 (the most negative number representable using
  //          sign-and-magnitude) is represented by 1;
  //   0      is represented by N; and
  //   N - 1  (the biggest number representable using
  //          sign-and-magnitude) is represented by 2N - 1.
  //
  // Read http://en.wikipedia.org/wiki/Signed_number_representations
  // for more details on signed number representations.
  static Bits SignAndMagnitudeToBiased(const Bits &sam) {
    if (kSignBitMask & sam) {
      // sam represents a negative number.
      return ~sam + 1;
    } else {
      // sam represents a positive number.
      return kSignBitMask | sam;
    }
  }

  // Given two numbers in the sign-and-magnitude representation,
  // returns the distance between them as an unsigned number.
  static Bits DistanceBetweenSignAndMagnitudeNumbers(const Bits &sam1,
                                                     const Bits &sam2) {
    const Bits biased1 = SignAndMagnitudeToBiased(sam1);
    const Bits biased2 = SignAndMagnitudeToBiased(sam2);
    return (biased1 >= biased2) ? (biased1 - biased2) : (biased2 - biased1);
  }

  FloatingPointUnion u_;
};

// We cannot use std::numeric_limits<T>::max() as it clashes with the max()
// macro defined by <windows.h>.
template <>
inline float FloatingPoint<float>::Max() { return FLT_MAX; }
template <>
inline double FloatingPoint<double>::Max() { return DBL_MAX; }

// Typedefs the instances of the FloatingPoint template class that we
// care to use.
typedef FloatingPoint<float> Float;
typedef FloatingPoint<double> Double;

// In order to catch the mistake of putting tests that use different
// test fixture classes in the same test case, we need to assign
// unique IDs to fixture classes and compare them.  The TypeId type is
// used to hold such IDs.  The user should treat TypeId as an opaque
// type: the only operation allowed on TypeId values is to compare
// them for equality using the == operator.
typedef const void* TypeId;

template <typename T>
class TypeIdHelper {
 public:
  // dummy_ must not have a const type.  Otherwise an overly eager
  // compiler (e.g. MSVC 7.1 & 8.0) may try to merge
  // TypeIdHelper<T>::dummy_ for different Ts as an "optimization".
  static bool dummy_;
};

template <typename T>
bool TypeIdHelper<T>::dummy_ = false;

// GetTypeId<T>() returns the ID of type T.  Different values will be
// returned for different types.  Calling the function twice with the
// same type argument is guaranteed to return the same ID.
template <typename T>
TypeId GetTypeId() {
  // The compiler is required to allocate a different
  // TypeIdHelper<T>::dummy_ variable for each T used to instantiate
  // the template.  Therefore, the address of dummy_ is guaranteed to
  // be unique.
  return &(TypeIdHelper<T>::dummy_);
}

// Returns the type ID of ::testing::Test.  Always call this instead
// of GetTypeId< ::testing::Test>() to get the type ID of
// ::testing::Test, as the latter may give the wrong result due to a
// suspected linker bug when compiling Google Test as a Mac OS X
// framework.
GTEST_API_ TypeId GetTestTypeId();

// Defines the abstract factory interface that creates instances
// of a Test object.
class TestFactoryBase {
 public:
  virtual ~TestFactoryBase() {}

  // Creates a test instance to run. The instance is both created and destroyed
  // within TestInfoImpl::Run()
  virtual Test* CreateTest() = 0;

 protected:
  TestFactoryBase() {}

 private:
  GTEST_DISALLOW_COPY_AND_ASSIGN_(TestFactoryBase);
};

// This class provides implementation of TeastFactoryBase interface.
// It is used in TEST and TEST_F macros.
template <class TestClass>
class TestFactoryImpl : public TestFactoryBase {
 public:
  virtual Test* CreateTest() { return new TestClass; }
};

#if GTEST_OS_WINDOWS

// Predicate-formatters for implementing the HRESULT checking macros
// {ASSERT|EXPECT}_HRESULT_{SUCCEEDED|FAILED}
// We pass a long instead of HRESULT to avoid causing an
// include dependency for the HRESULT type.
GTEST_API_ AssertionResult IsHRESULTSuccess(const char* expr,
                                            long hr);  // NOLINT
GTEST_API_ AssertionResult IsHRESULTFailure(const char* expr,
                                            long hr);  // NOLINT

#endif  // GTEST_OS_WINDOWS

// Types of SetUpTestCase() and TearDownTestCase() functions.
typedef void (*SetUpTestCaseFunc)();
typedef void (*TearDownTestCaseFunc)();

struct CodeLocation {
  CodeLocation(const string& a_file, int a_line) : file(a_file), line(a_line) {}

  string file;
  int line;
};

// Creates a new TestInfo object and registers it with Google Test;
// returns the created object.
//
// Arguments:
//
//   test_case_name:   name of the test case
//   name:             name of the test
//   type_param        the name of the test's type parameter, or NULL if
//                     this is not a typed or a type-parameterized test.
//   value_param       text representation of the test's value parameter,
//                     or NULL if this is not a type-parameterized test.
//   code_location:    code location where the test is defined
//   fixture_class_id: ID of the test fixture class
//   set_up_tc:        pointer to the function that sets up the test case
//   tear_down_tc:     pointer to the function that tears down the test case
//   factory:          pointer to the factory that creates a test object.
//                     The newly created TestInfo instance will assume
//                     ownership of the factory object.
GTEST_API_ TestInfo* MakeAndRegisterTestInfo(
    const char* test_case_name,
    const char* name,
    const char* type_param,
    const char* value_param,
    CodeLocation code_location,
    TypeId fixture_class_id,
    SetUpTestCaseFunc set_up_tc,
    TearDownTestCaseFunc tear_down_tc,
    TestFactoryBase* factory);

// If *pstr starts with the given prefix, modifies *pstr to be right
// past the prefix and returns true; otherwise leaves *pstr unchanged
// and returns false.  None of pstr, *pstr, and prefix can be NULL.
GTEST_API_ bool SkipPrefix(const char* prefix, const char** pstr);

#if GTEST_HAS_TYPED_TEST || GTEST_HAS_TYPED_TEST_P

// State of the definition of a type-parameterized test case.
class GTEST_API_ TypedTestCasePState {
 public:
  TypedTestCasePState() : registered_(false) {}

  // Adds the given test name to defined_test_names_ and return true
  // if the test case hasn't been registered; otherwise aborts the
  // program.
  bool AddTestName(const char* file, int line, const char* case_name,
                   const char* test_name) {
    if (registered_) {
      fprintf(stderr, "%s Test %s must be defined before "
              "REGISTER_TYPED_TEST_CASE_P(%s, ...).\n",
              FormatFileLocation(file, line).c_str(), test_name, case_name);
      fflush(stderr);
      posix::Abort();
    }
    registered_tests_.insert(
        ::std::make_pair(test_name, CodeLocation(file, line)));
    return true;
  }

  bool TestExists(const std::string& test_name) const {
    return registered_tests_.count(test_name) > 0;
  }

  const CodeLocation& GetCodeLocation(const std::string& test_name) const {
    RegisteredTestsMap::const_iterator it = registered_tests_.find(test_name);
    GTEST_CHECK_(it != registered_tests_.end());
    return it->second;
  }

  // Verifies that registered_tests match the test names in
  // defined_test_names_; returns registered_tests if successful, or
  // aborts the program otherwise.
  const char* VerifyRegisteredTestNames(
      const char* file, int line, const char* registered_tests);

 private:
  typedef ::std::map<std::string, CodeLocation> RegisteredTestsMap;

  bool registered_;
  RegisteredTestsMap registered_tests_;
};

// Skips to the first non-space char after the first comma in 'str';
// returns NULL if no comma is found in 'str'.
inline const char* SkipComma(const char* str) {
  const char* comma = strchr(str, ',');
  if (comma == NULL) {
    return NULL;
  }
  while (IsSpace(*(++comma))) {}
  return comma;
}

// Returns the prefix of 'str' before the first comma in it; returns
// the entire string if it contains no comma.
inline std::string GetPrefixUntilComma(const char* str) {
  const char* comma = strchr(str, ',');
  return comma == NULL ? str : std::string(str, comma);
}

// Splits a given string on a given delimiter, populating a given
// vector with the fields.
void SplitString(const ::std::string& str, char delimiter,
                 ::std::vector< ::std::string>* dest);

// TypeParameterizedTest<Fixture, TestSel, Types>::Register()
// registers a list of type-parameterized tests with Google Test.  The
// return value is insignificant - we just need to return something
// such that we can call this function in a namespace scope.
//
// Implementation note: The GTEST_TEMPLATE_ macro declares a template
// template parameter.  It's defined in gtest-type-util.h.
template <GTEST_TEMPLATE_ Fixture, class TestSel, typename Types>
class TypeParameterizedTest {
 public:
  // 'index' is the index of the test in the type list 'Types'
  // specified in INSTANTIATE_TYPED_TEST_CASE_P(Prefix, TestCase,
  // Types).  Valid values for 'index' are [0, N - 1] where N is the
  // length of Types.
  static bool Register(const char* prefix,
                       CodeLocation code_location,
                       const char* case_name, const char* test_names,
                       int index) {
    typedef typename Types::Head Type;
    typedef Fixture<Type> FixtureClass;
    typedef typename GTEST_BIND_(TestSel, Type) TestClass;

    // First, registers the first type-parameterized test in the type
    // list.
    MakeAndRegisterTestInfo(
        (std::string(prefix) + (prefix[0] == '\0' ? "" : "/") + case_name + "/"
         + StreamableToString(index)).c_str(),
        StripTrailingSpaces(GetPrefixUntilComma(test_names)).c_str(),
        GetTypeName<Type>().c_str(),
        NULL,  // No value parameter.
        code_location,
        GetTypeId<FixtureClass>(),
        TestClass::SetUpTestCase,
        TestClass::TearDownTestCase,
        new TestFactoryImpl<TestClass>);

    // Next, recurses (at compile time) with the tail of the type list.
    return TypeParameterizedTest<Fixture, TestSel, typename Types::Tail>
        ::Register(prefix, code_location, case_name, test_names, index + 1);
  }
};

// The base case for the compile time recursion.
template <GTEST_TEMPLATE_ Fixture, class TestSel>
class TypeParameterizedTest<Fixture, TestSel, Types0> {
 public:
  static bool Register(const char* /*prefix*/, CodeLocation,
                       const char* /*case_name*/, const char* /*test_names*/,
                       int /*index*/) {
    return true;
  }
};

// TypeParameterizedTestCase<Fixture, Tests, Types>::Register()
// registers *all combinations* of 'Tests' and 'Types' with Google
// Test.  The return value is insignificant - we just need to return
// something such that we can call this function in a namespace scope.
template <GTEST_TEMPLATE_ Fixture, typename Tests, typename Types>
class TypeParameterizedTestCase {
 public:
  static bool Register(const char* prefix, CodeLocation code_location,
                       const TypedTestCasePState* state,
                       const char* case_name, const char* test_names) {
    std::string test_name = StripTrailingSpaces(
        GetPrefixUntilComma(test_names));
    if (!state->TestExists(test_name)) {
      fprintf(stderr, "Failed to get code location for test %s.%s at %s.",
              case_name, test_name.c_str(),
              FormatFileLocation(code_location.file.c_str(),
                                 code_location.line).c_str());
      fflush(stderr);
      posix::Abort();
    }
    const CodeLocation& test_location = state->GetCodeLocation(test_name);

    typedef typename Tests::Head Head;

    // First, register the first test in 'Test' for each type in 'Types'.
    TypeParameterizedTest<Fixture, Head, Types>::Register(
        prefix, test_location, case_name, test_names, 0);

    // Next, recurses (at compile time) with the tail of the test list.
    return TypeParameterizedTestCase<Fixture, typename Tests::Tail, Types>
        ::Register(prefix, code_location, state,
                   case_name, SkipComma(test_names));
  }
};

// The base case for the compile time recursion.
template <GTEST_TEMPLATE_ Fixture, typename Types>
class TypeParameterizedTestCase<Fixture, Templates0, Types> {
 public:
  static bool Register(const char* /*prefix*/, CodeLocation,
                       const TypedTestCasePState* /*state*/,
                       const char* /*case_name*/, const char* /*test_names*/) {
    return true;
  }
};

#endif  // GTEST_HAS_TYPED_TEST || GTEST_HAS_TYPED_TEST_P

// Returns the current OS stack trace as an std::string.
//
// The maximum number of stack frames to be included is specified by
// the gtest_stack_trace_depth flag.  The skip_count parameter
// specifies the number of top frames to be skipped, which doesn't
// count against the number of frames to be included.
//
// For example, if Foo() calls Bar(), which in turn calls
// GetCurrentOsStackTraceExceptTop(..., 1), Foo() will be included in
// the trace but Bar() and GetCurrentOsStackTraceExceptTop() won't.
GTEST_API_ std::string GetCurrentOsStackTraceExceptTop(
    UnitTest* unit_test, int skip_count);

// Helpers for suppressing warnings on unreachable code or constant
// condition.

// Always returns true.
GTEST_API_ bool AlwaysTrue();

// Always returns false.
inline bool AlwaysFalse() { return !AlwaysTrue(); }

// Helper for suppressing false warning from Clang on a const char*
// variable declared in a conditional expression always being NULL in
// the else branch.
struct GTEST_API_ ConstCharPtr {
  ConstCharPtr(const char* str) : value(str) {}
  operator bool() const { return true; }
  const char* value;
};

// A simple Linear Congruential Generator for generating random
// numbers with a uniform distribution.  Unlike rand() and srand(), it
// doesn't use global state (and therefore can't interfere with user
// code).  Unlike rand_r(), it's portable.  An LCG isn't very random,
// but it's good enough for our purposes.
class GTEST_API_ Random {
 public:
  static const UInt32 kMaxRange = 1u << 31;

  explicit Random(UInt32 seed) : state_(seed) {}

  void Reseed(UInt32 seed) { state_ = seed; }

  // Generates a random number from [0, range).  Crashes if 'range' is
  // 0 or greater than kMaxRange.
  UInt32 Generate(UInt32 range);

 private:
  UInt32 state_;
  GTEST_DISALLOW_COPY_AND_ASSIGN_(Random);
};

// Defining a variable of type CompileAssertTypesEqual<T1, T2> will cause a
// compiler error iff T1 and T2 are different types.
template <typename T1, typename T2>
struct CompileAssertTypesEqual;

template <typename T>
struct CompileAssertTypesEqual<T, T> {
};

// Removes the reference from a type if it is a reference type,
// otherwise leaves it unchanged.  This is the same as
// tr1::remove_reference, which is not widely available yet.
template <typename T>
struct RemoveReference { typedef T type; };  // NOLINT
template <typename T>
struct RemoveReference<T&> { typedef T type; };  // NOLINT

// A handy wrapper around RemoveReference that works when the argument
// T depends on template parameters.
#define GTEST_REMOVE_REFERENCE_(T) \
    typename ::testing::internal::RemoveReference<T>::type

// Removes const from a type if it is a const type, otherwise leaves
// it unchanged.  This is the same as tr1::remove_const, which is not
// widely available yet.
template <typename T>
struct RemoveConst { typedef T type; };  // NOLINT
template <typename T>
struct RemoveConst<const T> { typedef T type; };  // NOLINT

// MSVC 8.0, Sun C++, and IBM XL C++ have a bug which causes the above
// definition to fail to remove the const in 'const int[3]' and 'const
// char[3][4]'.  The following specialization works around the bug.
template <typename T, size_t N>
struct RemoveConst<const T[N]> {
  typedef typename RemoveConst<T>::type type[N];
};

#if defined(_MSC_VER) && _MSC_VER < 1400
// This is the only specialization that allows VC++ 7.1 to remove const in
// 'const int[3] and 'const int[3][4]'.  However, it causes trouble with GCC
// and thus needs to be conditionally compiled.
template <typename T, size_t N>
struct RemoveConst<T[N]> {
  typedef typename RemoveConst<T>::type type[N];
};
#endif

// A handy wrapper around RemoveConst that works when the argument
// T depends on template parameters.
#define GTEST_REMOVE_CONST_(T) \
    typename ::testing::internal::RemoveConst<T>::type

// Turns const U&, U&, const U, and U all into U.
#define GTEST_REMOVE_REFERENCE_AND_CONST_(T) \
    GTEST_REMOVE_CONST_(GTEST_REMOVE_REFERENCE_(T))

// Adds reference to a type if it is not a reference type,
// otherwise leaves it unchanged.  This is the same as
// tr1::add_reference, which is not widely available yet.
template <typename T>
struct AddReference { typedef T& type; };  // NOLINT
template <typename T>
struct AddReference<T&> { typedef T& type; };  // NOLINT

// A handy wrapper around AddReference that works when the argument T
// depends on template parameters.
#define GTEST_ADD_REFERENCE_(T) \
    typename ::testing::internal::AddReference<T>::type

// Adds a reference to const on top of T as necessary.  For example,
// it transforms
//
//   char         ==> const char&
//   const char   ==> const char&
//   char&        ==> const char&
//   const char&  ==> const char&
//
// The argument T must depend on some template parameters.
#define GTEST_REFERENCE_TO_CONST_(T) \
    GTEST_ADD_REFERENCE_(const GTEST_REMOVE_REFERENCE_(T))

// ImplicitlyConvertible<From, To>::value is a compile-time bool
// constant that's true iff type From can be implicitly converted to
// type To.
template <typename From, typename To>
class ImplicitlyConvertible {
 private:
  // We need the following helper functions only for their types.
  // They have no implementations.

  // MakeFrom() is an expression whose type is From.  We cannot simply
  // use From(), as the type From may not have a public default
  // constructor.
  static typename AddReference<From>::type MakeFrom();

  // These two functions are overloaded.  Given an expression
  // Helper(x), the compiler will pick the first version if x can be
  // implicitly converted to type To; otherwise it will pick the
  // second version.
  //
  // The first version returns a value of size 1, and the second
  // version returns a value of size 2.  Therefore, by checking the
  // size of Helper(x), which can be done at compile time, we can tell
  // which version of Helper() is used, and hence whether x can be
  // implicitly converted to type To.
  static char Helper(To);
  static char (&Helper(...))[2];  // NOLINT

  // We have to put the 'public' section after the 'private' section,
  // or MSVC refuses to compile the code.
 public:
#if defined(__BORLANDC__)
  // C++Builder cannot use member overload resolution during template
  // instantiation.  The simplest workaround is to use its C++0x type traits
  // functions (C++Builder 2009 and above only).
  static const bool value = __is_convertible(From, To);
#else
  // MSVC warns about implicitly converting from double to int for
  // possible loss of data, so we need to temporarily disable the
  // warning.
  GTEST_DISABLE_MSC_WARNINGS_PUSH_(4244)
  static const bool value =
      sizeof(Helper(ImplicitlyConvertible::MakeFrom())) == 1;
  GTEST_DISABLE_MSC_WARNINGS_POP_()
#endif  // __BORLANDC__
};
template <typename From, typename To>
const bool ImplicitlyConvertible<From, To>::value;

// IsAProtocolMessage<T>::value is a compile-time bool constant that's
// true iff T is type ProtocolMessage, proto2::Message, or a subclass
// of those.
template <typename T>
struct IsAProtocolMessage
    : public bool_constant<
  ImplicitlyConvertible<const T*, const ::ProtocolMessage*>::value ||
  ImplicitlyConvertible<const T*, const ::proto2::Message*>::value> {
};

// When the compiler sees expression IsContainerTest<C>(0), if C is an
// STL-style container class, the first overload of IsContainerTest
// will be viable (since both C::iterator* and C::const_iterator* are
// valid types and NULL can be implicitly converted to them).  It will
// be picked over the second overload as 'int' is a perfect match for
// the type of argument 0.  If C::iterator or C::const_iterator is not
// a valid type, the first overload is not viable, and the second
// overload will be picked.  Therefore, we can determine whether C is
// a container class by checking the type of IsContainerTest<C>(0).
// The value of the expression is insignificant.
//
// Note that we look for both C::iterator and C::const_iterator.  The
// reason is that C++ injects the name of a class as a member of the
// class itself (e.g. you can refer to class iterator as either
// 'iterator' or 'iterator::iterator').  If we look for C::iterator
// only, for example, we would mistakenly think that a class named
// iterator is an STL container.
//
// Also note that the simpler approach of overloading
// IsContainerTest(typename C::const_iterator*) and
// IsContainerTest(...) doesn't work with Visual Age C++ and Sun C++.
typedef int IsContainer;
template <class C>
IsContainer IsContainerTest(int /* dummy */,
                            typename C::iterator* /* it */ = NULL,
                            typename C::const_iterator* /* const_it */ = NULL) {
  return 0;
}

typedef char IsNotContainer;
template <class C>
IsNotContainer IsContainerTest(long /* dummy */) { return '\0'; }

// EnableIf<condition>::type is void when 'Cond' is true, and
// undefined when 'Cond' is false.  To use SFINAE to make a function
// overload only apply when a particular expression is true, add
// "typename EnableIf<expression>::type* = 0" as the last parameter.
template<bool> struct EnableIf;
template<> struct EnableIf<true> { typedef void type; };  // NOLINT

// Utilities for native arrays.

// ArrayEq() compares two k-dimensional native arrays using the
// elements' operator==, where k can be any integer >= 0.  When k is
// 0, ArrayEq() degenerates into comparing a single pair of values.

template <typename T, typename U>
bool ArrayEq(const T* lhs, size_t size, const U* rhs);

// This generic version is used when k is 0.
template <typename T, typename U>
inline bool ArrayEq(const T& lhs, const U& rhs) { return lhs == rhs; }

// This overload is used when k >= 1.
template <typename T, typename U, size_t N>
inline bool ArrayEq(const T(&lhs)[N], const U(&rhs)[N]) {
  return internal::ArrayEq(lhs, N, rhs);
}

// This helper reduces code bloat.  If we instead put its logic inside
// the previous ArrayEq() function, arrays with different sizes would
// lead to different copies of the template code.
template <typename T, typename U>
bool ArrayEq(const T* lhs, size_t size, const U* rhs) {
  for (size_t i = 0; i != size; i++) {
    if (!internal::ArrayEq(lhs[i], rhs[i]))
      return false;
  }
  return true;
}

// Finds the first element in the iterator range [begin, end) that
// equals elem.  Element may be a native array type itself.
template <typename Iter, typename Element>
Iter ArrayAwareFind(Iter begin, Iter end, const Element& elem) {
  for (Iter it = begin; it != end; ++it) {
    if (internal::ArrayEq(*it, elem))
      return it;
  }
  return end;
}

// CopyArray() copies a k-dimensional native array using the elements'
// operator=, where k can be any integer >= 0.  When k is 0,
// CopyArray() degenerates into copying a single value.

template <typename T, typename U>
void CopyArray(const T* from, size_t size, U* to);

// This generic version is used when k is 0.
template <typename T, typename U>
inline void CopyArray(const T& from, U* to) { *to = from; }

// This overload is used when k >= 1.
template <typename T, typename U, size_t N>
inline void CopyArray(const T(&from)[N], U(*to)[N]) {
  internal::CopyArray(from, N, *to);
}

// This helper reduces code bloat.  If we instead put its logic inside
// the previous CopyArray() function, arrays with different sizes
// would lead to different copies of the template code.
template <typename T, typename U>
void CopyArray(const T* from, size_t size, U* to) {
  for (size_t i = 0; i != size; i++) {
    internal::CopyArray(from[i], to + i);
  }
}

// The relation between an NativeArray object (see below) and the
// native array it represents.
// We use 2 different structs to allow non-copyable types to be used, as long
// as RelationToSourceReference() is passed.
struct RelationToSourceReference {};
struct RelationToSourceCopy {};

// Adapts a native array to a read-only STL-style container.  Instead
// of the complete STL container concept, this adaptor only implements
// members useful for Google Mock's container matchers.  New members
// should be added as needed.  To simplify the implementation, we only
// support Element being a raw type (i.e. having no top-level const or
// reference modifier).  It's the client's responsibility to satisfy
// this requirement.  Element can be an array type itself (hence
// multi-dimensional arrays are supported).
template <typename Element>
class NativeArray {
 public:
  // STL-style container typedefs.
  typedef Element value_type;
  typedef Element* iterator;
  typedef const Element* const_iterator;

  // Constructs from a native array. References the source.
  NativeArray(const Element* array, size_t count, RelationToSourceReference) {
    InitRef(array, count);
  }

  // Constructs from a native array. Copies the source.
  NativeArray(const Element* array, size_t count, RelationToSourceCopy) {
    InitCopy(array, count);
  }

  // Copy constructor.
  NativeArray(const NativeArray& rhs) {
    (this->*rhs.clone_)(rhs.array_, rhs.size_);
  }

  ~NativeArray() {
    if (clone_ != &NativeArray::InitRef)
      delete[] array_;
  }

  // STL-style container methods.
  size_t size() const { return size_; }
  const_iterator begin() const { return array_; }
  const_iterator end() const { return array_ + size_; }
  bool operator==(const NativeArray& rhs) const {
    return size() == rhs.size() &&
        ArrayEq(begin(), size(), rhs.begin());
  }

 private:
  enum {
    kCheckTypeIsNotConstOrAReference = StaticAssertTypeEqHelper<
        Element, GTEST_REMOVE_REFERENCE_AND_CONST_(Element)>::value,
  };

  // Initializes this object with a copy of the input.
  void InitCopy(const Element* array, size_t a_size) {
    Element* const copy = new Element[a_size];
    CopyArray(array, a_size, copy);
    array_ = copy;
    size_ = a_size;
    clone_ = &NativeArray::InitCopy;
  }

  // Initializes this object with a reference of the input.
  void InitRef(const Element* array, size_t a_size) {
    array_ = array;
    size_ = a_size;
    clone_ = &NativeArray::InitRef;
  }

  const Element* array_;
  size_t size_;
  void (NativeArray::*clone_)(const Element*, size_t);

  GTEST_DISALLOW_ASSIGN_(NativeArray);
};

}  // namespace internal
}  // namespace testing

#define GTEST_MESSAGE_AT_(file, line, message, result_type) \
  ::testing::internal::AssertHelper(result_type, file, line, message) \
    = ::testing::Message()

#define GTEST_MESSAGE_(message, result_type) \
  GTEST_MESSAGE_AT_(__FILE__, __LINE__, message, result_type)

#define GTEST_FATAL_FAILURE_(message) \
  return GTEST_MESSAGE_(message, ::testing::TestPartResult::kFatalFailure)

#define GTEST_NONFATAL_FAILURE_(message) \
  GTEST_MESSAGE_(message, ::testing::TestPartResult::kNonFatalFailure)

#define GTEST_SUCCESS_(message) \
  GTEST_MESSAGE_(message, ::testing::TestPartResult::kSuccess)

// Suppresses MSVC warnings 4072 (unreachable code) for the code following
// statement if it returns or throws (or doesn't return or throw in some
// situations).
#define GTEST_SUPPRESS_UNREACHABLE_CODE_WARNING_BELOW_(statement) \
  if (::testing::internal::AlwaysTrue()) { statement; }

#define GTEST_TEST_THROW_(statement, expected_exception, fail) \
  GTEST_AMBIGUOUS_ELSE_BLOCKER_ \
  if (::testing::internal::ConstCharPtr gtest_msg = "") { \
    bool gtest_caught_expected = false; \
    try { \
      GTEST_SUPPRESS_UNREACHABLE_CODE_WARNING_BELOW_(statement); \
    } \
    catch (expected_exception const&) { \
      gtest_caught_expected = true; \
    } \
    catch (...) { \
      gtest_msg.value = \
          "Expected: " #statement " throws an exception of type " \
          #expected_exception ".\n  Actual: it throws a different type."; \
      goto GTEST_CONCAT_TOKEN_(gtest_label_testthrow_, __LINE__); \
    } \
    if (!gtest_caught_expected) { \
      gtest_msg.value = \
          "Expected: " #statement " throws an exception of type " \
          #expected_exception ".\n  Actual: it throws nothing."; \
      goto GTEST_CONCAT_TOKEN_(gtest_label_testthrow_, __LINE__); \
    } \
  } else \
    GTEST_CONCAT_TOKEN_(gtest_label_testthrow_, __LINE__): \
      fail(gtest_msg.value)

#define GTEST_TEST_NO_THROW_(statement, fail) \
  GTEST_AMBIGUOUS_ELSE_BLOCKER_ \
  if (::testing::internal::AlwaysTrue()) { \
    try { \
      GTEST_SUPPRESS_UNREACHABLE_CODE_WARNING_BELOW_(statement); \
    } \
    catch (...) { \
      goto GTEST_CONCAT_TOKEN_(gtest_label_testnothrow_, __LINE__); \
    } \
  } else \
    GTEST_CONCAT_TOKEN_(gtest_label_testnothrow_, __LINE__): \
      fail("Expected: " #statement " doesn't throw an exception.\n" \
           "  Actual: it throws.")

#define GTEST_TEST_ANY_THROW_(statement, fail) \
  GTEST_AMBIGUOUS_ELSE_BLOCKER_ \
  if (::testing::internal::AlwaysTrue()) { \
    bool gtest_caught_any = false; \
    try { \
      GTEST_SUPPRESS_UNREACHABLE_CODE_WARNING_BELOW_(statement); \
    } \
    catch (...) { \
      gtest_caught_any = true; \
    } \
    if (!gtest_caught_any) { \
      goto GTEST_CONCAT_TOKEN_(gtest_label_testanythrow_, __LINE__); \
    } \
  } else \
    GTEST_CONCAT_TOKEN_(gtest_label_testanythrow_, __LINE__): \
      fail("Expected: " #statement " throws an exception.\n" \
           "  Actual: it doesn't.")


// Implements Boolean test assertions such as EXPECT_TRUE. expression can be
// either a boolean expression or an AssertionResult. text is a textual
// represenation of expression as it was passed into the EXPECT_TRUE.
#define GTEST_TEST_BOOLEAN_(expression, text, actual, expected, fail) \
  GTEST_AMBIGUOUS_ELSE_BLOCKER_ \
  if (const ::testing::AssertionResult gtest_ar_ = \
      ::testing::AssertionResult(expression)) \
    ; \
  else \
    fail(::testing::internal::GetBoolAssertionFailureMessage(\
        gtest_ar_, text, #actual, #expected).c_str())

#define GTEST_TEST_NO_FATAL_FAILURE_(statement, fail) \
  GTEST_AMBIGUOUS_ELSE_BLOCKER_ \
  if (::testing::internal::AlwaysTrue()) { \
    ::testing::internal::HasNewFatalFailureHelper gtest_fatal_failure_checker; \
    GTEST_SUPPRESS_UNREACHABLE_CODE_WARNING_BELOW_(statement); \
    if (gtest_fatal_failure_checker.has_new_fatal_failure()) { \
      goto GTEST_CONCAT_TOKEN_(gtest_label_testnofatal_, __LINE__); \
    } \
  } else \
    GTEST_CONCAT_TOKEN_(gtest_label_testnofatal_, __LINE__): \
      fail("Expected: " #statement " doesn't generate new fatal " \
           "failures in the current thread.\n" \
           "  Actual: it does.")

// Expands to the name of the class that implements the given test.
#define GTEST_TEST_CLASS_NAME_(test_case_name, test_name) \
  test_case_name##_##test_name##_Test

// Helper macro for defining tests.
#define GTEST_TEST_(test_case_name, test_name, parent_class, parent_id)\
class GTEST_TEST_CLASS_NAME_(test_case_name, test_name) : public parent_class {\
 public:\
  GTEST_TEST_CLASS_NAME_(test_case_name, test_name)() {}\
 private:\
  virtual void TestBody();\
  static ::testing::TestInfo* const test_info_ GTEST_ATTRIBUTE_UNUSED_;\
  GTEST_DISALLOW_COPY_AND_ASSIGN_(\
      GTEST_TEST_CLASS_NAME_(test_case_name, test_name));\
};\
\
::testing::TestInfo* const GTEST_TEST_CLASS_NAME_(test_case_name, test_name)\
  ::test_info_ =\
    ::testing::internal::MakeAndRegisterTestInfo(\
        #test_case_name, #test_name, NULL, NULL, \
        ::testing::internal::CodeLocation(__FILE__, __LINE__), \
        (parent_id), \
        parent_class::SetUpTestCase, \
        parent_class::TearDownTestCase, \
        new ::testing::internal::TestFactoryImpl<\
            GTEST_TEST_CLASS_NAME_(test_case_name, test_name)>);\
void GTEST_TEST_CLASS_NAME_(test_case_name, test_name)::TestBody()

#endif  // GTEST_INCLUDE_GTEST_INTERNAL_GTEST_INTERNAL_H_

// Copyright 2005, Google Inc.
// All rights reserved.
//
// Redistribution and use in source and binary forms, with or without
// modification, are permitted provided that the following conditions are
// met:
//
//     * Redistributions of source code must retain the above copyright
// notice, this list of conditions and the following disclaimer.
//     * Redistributions in binary form must reproduce the above
// copyright notice, this list of conditions and the following disclaimer
// in the documentation and/or other materials provided with the
// distribution.
//     * Neither the name of Google Inc. nor the names of its
// contributors may be used to endorse or promote products derived from
// this software without specific prior written permission.
//
// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
// "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
// LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
// A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
// OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
// SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
// LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
// DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
// THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
//
// Author: wan@google.com (Zhanyong Wan)
//
// The Google C++ Testing Framework (Google Test)
//
// This header file defines the public API for death tests.  It is
// #included by gtest.h so a user doesn't need to include this
// directly.

#ifndef GTEST_INCLUDE_GTEST_GTEST_DEATH_TEST_H_
#define GTEST_INCLUDE_GTEST_GTEST_DEATH_TEST_H_

// Copyright 2005, Google Inc.
// All rights reserved.
//
// Redistribution and use in source and binary forms, with or without
// modification, are permitted provided that the following conditions are
// met:
//
//     * Redistributions of source code must retain the above copyright
// notice, this list of conditions and the following disclaimer.
//     * Redistributions in binary form must reproduce the above
// copyright notice, this list of conditions and the following disclaimer
// in the documentation and/or other materials provided with the
// distribution.
//     * Neither the name of Google Inc. nor the names of its
// contributors may be used to endorse or promote products derived from
// this software without specific prior written permission.
//
// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
// "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
// LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
// A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
// OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
// SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
// LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
// DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
// THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
//
// Authors: wan@google.com (Zhanyong Wan), eefacm@gmail.com (Sean Mcafee)
//
// The Google C++ Testing Framework (Google Test)
//
// This header file defines internal utilities needed for implementing
// death tests.  They are subject to change without notice.

#ifndef GTEST_INCLUDE_GTEST_INTERNAL_GTEST_DEATH_TEST_INTERNAL_H_
#define GTEST_INCLUDE_GTEST_INTERNAL_GTEST_DEATH_TEST_INTERNAL_H_


#include <stdio.h>

namespace testing {
namespace internal {

GTEST_DECLARE_string_(internal_run_death_test);

// Names of the flags (needed for parsing Google Test flags).
const char kDeathTestStyleFlag[] = "death_test_style";
const char kDeathTestUseFork[] = "death_test_use_fork";
const char kInternalRunDeathTestFlag[] = "internal_run_death_test";

#if GTEST_HAS_DEATH_TEST

// DeathTest is a class that hides much of the complexity of the
// GTEST_DEATH_TEST_ macro.  It is abstract; its static Create method
// returns a concrete class that depends on the prevailing death test
// style, as defined by the --gtest_death_test_style and/or
// --gtest_internal_run_death_test flags.

// In describing the results of death tests, these terms are used with
// the corresponding definitions:
//
// exit status:  The integer exit information in the format specified
//               by wait(2)
// exit code:    The integer code passed to exit(3), _exit(2), or
//               returned from main()
class GTEST_API_ DeathTest {
 public:
  // Create returns false if there was an error determining the
  // appropriate action to take for the current death test; for example,
  // if the gtest_death_test_style flag is set to an invalid value.
  // The LastMessage method will return a more detailed message in that
  // case.  Otherwise, the DeathTest pointer pointed to by the "test"
  // argument is set.  If the death test should be skipped, the pointer
  // is set to NULL; otherwise, it is set to the address of a new concrete
  // DeathTest object that controls the execution of the current test.
  static bool Create(const char* statement, const RE* regex,
                     const char* file, int line, DeathTest** test);
  DeathTest();
  virtual ~DeathTest() { }

  // A helper class that aborts a death test when it's deleted.
  class ReturnSentinel {
   public:
    explicit ReturnSentinel(DeathTest* test) : test_(test) { }
    ~ReturnSentinel() { test_->Abort(TEST_ENCOUNTERED_RETURN_STATEMENT); }
   private:
    DeathTest* const test_;
    GTEST_DISALLOW_COPY_AND_ASSIGN_(ReturnSentinel);
  } GTEST_ATTRIBUTE_UNUSED_;

  // An enumeration of possible roles that may be taken when a death
  // test is encountered.  EXECUTE means that the death test logic should
  // be executed immediately.  OVERSEE means that the program should prepare
  // the appropriate environment for a child process to execute the death
  // test, then wait for it to complete.
  enum TestRole { OVERSEE_TEST, EXECUTE_TEST };

  // An enumeration of the three reasons that a test might be aborted.
  enum AbortReason {
    TEST_ENCOUNTERED_RETURN_STATEMENT,
    TEST_THREW_EXCEPTION,
    TEST_DID_NOT_DIE
  };

  // Assumes one of the above roles.
  virtual TestRole AssumeRole() = 0;

  // Waits for the death test to finish and returns its status.
  virtual int Wait() = 0;

  // Returns true if the death test passed; that is, the test process
  // exited during the test, its exit status matches a user-supplied
  // predicate, and its stderr output matches a user-supplied regular
  // expression.
  // The user-supplied predicate may be a macro expression rather
  // than a function pointer or functor, or else Wait and Passed could
  // be combined.
  virtual bool Passed(bool exit_status_ok) = 0;

  // Signals that the death test did not die as expected.
  virtual void Abort(AbortReason reason) = 0;

  // Returns a human-readable outcome message regarding the outcome of
  // the last death test.
  static const char* LastMessage();

  static void set_last_death_test_message(const std::string& message);

 private:
  // A string containing a description of the outcome of the last death test.
  static std::string last_death_test_message_;

  GTEST_DISALLOW_COPY_AND_ASSIGN_(DeathTest);
};

// Factory interface for death tests.  May be mocked out for testing.
class DeathTestFactory {
 public:
  virtual ~DeathTestFactory() { }
  virtual bool Create(const char* statement, const RE* regex,
                      const char* file, int line, DeathTest** test) = 0;
};

// A concrete DeathTestFactory implementation for normal use.
class DefaultDeathTestFactory : public DeathTestFactory {
 public:
  virtual bool Create(const char* statement, const RE* regex,
                      const char* file, int line, DeathTest** test);
};

// Returns true if exit_status describes a process that was terminated
// by a signal, or exited normally with a nonzero exit code.
GTEST_API_ bool ExitedUnsuccessfully(int exit_status);

// Traps C++ exceptions escaping statement and reports them as test
// failures. Note that trapping SEH exceptions is not implemented here.
# if GTEST_HAS_EXCEPTIONS
#  define GTEST_EXECUTE_DEATH_TEST_STATEMENT_(statement, death_test) \
  try { \
    GTEST_SUPPRESS_UNREACHABLE_CODE_WARNING_BELOW_(statement); \
  } catch (const ::std::exception& gtest_exception) { \
    fprintf(\
        stderr, \
        "\n%s: Caught std::exception-derived exception escaping the " \
        "death test statement. Exception message: %s\n", \
        ::testing::internal::FormatFileLocation(__FILE__, __LINE__).c_str(), \
        gtest_exception.what()); \
    fflush(stderr); \
    death_test->Abort(::testing::internal::DeathTest::TEST_THREW_EXCEPTION); \
  } catch (...) { \
    death_test->Abort(::testing::internal::DeathTest::TEST_THREW_EXCEPTION); \
  }

# else
#  define GTEST_EXECUTE_DEATH_TEST_STATEMENT_(statement, death_test) \
  GTEST_SUPPRESS_UNREACHABLE_CODE_WARNING_BELOW_(statement)

# endif

// This macro is for implementing ASSERT_DEATH*, EXPECT_DEATH*,
// ASSERT_EXIT*, and EXPECT_EXIT*.
# define GTEST_DEATH_TEST_(statement, predicate, regex, fail) \
  GTEST_AMBIGUOUS_ELSE_BLOCKER_ \
  if (::testing::internal::AlwaysTrue()) { \
    const ::testing::internal::RE& gtest_regex = (regex); \
    ::testing::internal::DeathTest* gtest_dt; \
    if (!::testing::internal::DeathTest::Create(#statement, &gtest_regex, \
        __FILE__, __LINE__, &gtest_dt)) { \
      goto GTEST_CONCAT_TOKEN_(gtest_label_, __LINE__); \
    } \
    if (gtest_dt != NULL) { \
      ::testing::internal::scoped_ptr< ::testing::internal::DeathTest> \
          gtest_dt_ptr(gtest_dt); \
      switch (gtest_dt->AssumeRole()) { \
        case ::testing::internal::DeathTest::OVERSEE_TEST: \
          if (!gtest_dt->Passed(predicate(gtest_dt->Wait()))) { \
            goto GTEST_CONCAT_TOKEN_(gtest_label_, __LINE__); \
          } \
          break; \
        case ::testing::internal::DeathTest::EXECUTE_TEST: { \
          ::testing::internal::DeathTest::ReturnSentinel \
              gtest_sentinel(gtest_dt); \
          GTEST_EXECUTE_DEATH_TEST_STATEMENT_(statement, gtest_dt); \
          gtest_dt->Abort(::testing::internal::DeathTest::TEST_DID_NOT_DIE); \
          break; \
        } \
        default: \
          break; \
      } \
    } \
  } else \
    GTEST_CONCAT_TOKEN_(gtest_label_, __LINE__): \
      fail(::testing::internal::DeathTest::LastMessage())
// The symbol "fail" here expands to something into which a message
// can be streamed.

// This macro is for implementing ASSERT/EXPECT_DEBUG_DEATH when compiled in
// NDEBUG mode. In this case we need the statements to be executed, the regex is
// ignored, and the macro must accept a streamed message even though the message
// is never printed.
# define GTEST_EXECUTE_STATEMENT_(statement, regex) \
  GTEST_AMBIGUOUS_ELSE_BLOCKER_ \
  if (::testing::internal::AlwaysTrue()) { \
     GTEST_SUPPRESS_UNREACHABLE_CODE_WARNING_BELOW_(statement); \
  } else \
    ::testing::Message()

// A class representing the parsed contents of the
// --gtest_internal_run_death_test flag, as it existed when
// RUN_ALL_TESTS was called.
class InternalRunDeathTestFlag {
 public:
  InternalRunDeathTestFlag(const std::string& a_file,
                           int a_line,
                           int an_index,
                           int a_write_fd)
      : file_(a_file), line_(a_line), index_(an_index),
        write_fd_(a_write_fd) {}

  ~InternalRunDeathTestFlag() {
    if (write_fd_ >= 0)
      posix::Close(write_fd_);
  }

  const std::string& file() const { return file_; }
  int line() const { return line_; }
  int index() const { return index_; }
  int write_fd() const { return write_fd_; }

 private:
  std::string file_;
  int line_;
  int index_;
  int write_fd_;

  GTEST_DISALLOW_COPY_AND_ASSIGN_(InternalRunDeathTestFlag);
};

// Returns a newly created InternalRunDeathTestFlag object with fields
// initialized from the GTEST_FLAG(internal_run_death_test) flag if
// the flag is specified; otherwise returns NULL.
InternalRunDeathTestFlag* ParseInternalRunDeathTestFlag();

#else  // GTEST_HAS_DEATH_TEST

// This macro is used for implementing macros such as
// EXPECT_DEATH_IF_SUPPORTED and ASSERT_DEATH_IF_SUPPORTED on systems where
// death tests are not supported. Those macros must compile on such systems
// iff EXPECT_DEATH and ASSERT_DEATH compile with the same parameters on
// systems that support death tests. This allows one to write such a macro
// on a system that does not support death tests and be sure that it will
// compile on a death-test supporting system.
//
// Parameters:
//   statement -  A statement that a macro such as EXPECT_DEATH would test
//                for program termination. This macro has to make sure this
//                statement is compiled but not executed, to ensure that
//                EXPECT_DEATH_IF_SUPPORTED compiles with a certain
//                parameter iff EXPECT_DEATH compiles with it.
//   regex     -  A regex that a macro such as EXPECT_DEATH would use to test
//                the output of statement.  This parameter has to be
//                compiled but not evaluated by this macro, to ensure that
//                this macro only accepts expressions that a macro such as
//                EXPECT_DEATH would accept.
//   terminator - Must be an empty statement for EXPECT_DEATH_IF_SUPPORTED
//                and a return statement for ASSERT_DEATH_IF_SUPPORTED.
//                This ensures that ASSERT_DEATH_IF_SUPPORTED will not
//                compile inside functions where ASSERT_DEATH doesn't
//                compile.
//
//  The branch that has an always false condition is used to ensure that
//  statement and regex are compiled (and thus syntactically correct) but
//  never executed. The unreachable code macro protects the terminator
//  statement from generating an 'unreachable code' warning in case
//  statement unconditionally returns or throws. The Message constructor at
//  the end allows the syntax of streaming additional messages into the
//  macro, for compilational compatibility with EXPECT_DEATH/ASSERT_DEATH.
# define GTEST_UNSUPPORTED_DEATH_TEST_(statement, regex, terminator) \
    GTEST_AMBIGUOUS_ELSE_BLOCKER_ \
    if (::testing::internal::AlwaysTrue()) { \
      GTEST_LOG_(WARNING) \
          << "Death tests are not supported on this platform.\n" \
          << "Statement '" #statement "' cannot be verified."; \
    } else if (::testing::internal::AlwaysFalse()) { \
      ::testing::internal::RE::PartialMatch(".*", (regex)); \
      GTEST_SUPPRESS_UNREACHABLE_CODE_WARNING_BELOW_(statement); \
      terminator; \
    } else \
      ::testing::Message()

#endif  // GTEST_HAS_DEATH_TEST

}  // namespace internal
}  // namespace testing

#endif  // GTEST_INCLUDE_GTEST_INTERNAL_GTEST_DEATH_TEST_INTERNAL_H_

namespace testing {

// This flag controls the style of death tests.  Valid values are "threadsafe",
// meaning that the death test child process will re-execute the test binary
// from the start, running only a single death test, or "fast",
// meaning that the child process will execute the test logic immediately
// after forking.
GTEST_DECLARE_string_(death_test_style);

#if GTEST_HAS_DEATH_TEST

namespace internal {

// Returns a Boolean value indicating whether the caller is currently
// executing in the context of the death test child process.  Tools such as
// Valgrind heap checkers may need this to modify their behavior in death
// tests.  IMPORTANT: This is an internal utility.  Using it may break the
// implementation of death tests.  User code MUST NOT use it.
GTEST_API_ bool InDeathTestChild();

}  // namespace internal

// The following macros are useful for writing death tests.

// Here's what happens when an ASSERT_DEATH* or EXPECT_DEATH* is
// executed:
//
//   1. It generates a warning if there is more than one active
//   thread.  This is because it's safe to fork() or clone() only
//   when there is a single thread.
//
//   2. The parent process clone()s a sub-process and runs the death
//   test in it; the sub-process exits with code 0 at the end of the
//   death test, if it hasn't exited already.
//
//   3. The parent process waits for the sub-process to terminate.
//
//   4. The parent process checks the exit code and error message of
//   the sub-process.
//
// Examples:
//
//   ASSERT_DEATH(server.SendMessage(56, "Hello"), "Invalid port number");
//   for (int i = 0; i < 5; i++) {
//     EXPECT_DEATH(server.ProcessRequest(i),
//                  "Invalid request .* in ProcessRequest()")
//                  << "Failed to die on request " << i;
//   }
//
//   ASSERT_EXIT(server.ExitNow(), ::testing::ExitedWithCode(0), "Exiting");
//
//   bool KilledBySIGHUP(int exit_code) {
//     return WIFSIGNALED(exit_code) && WTERMSIG(exit_code) == SIGHUP;
//   }
//
//   ASSERT_EXIT(client.HangUpServer(), KilledBySIGHUP, "Hanging up!");
//
// On the regular expressions used in death tests:
//
//   On POSIX-compliant systems (*nix), we use the <regex.h> library,
//   which uses the POSIX extended regex syntax.
//
//   On other platforms (e.g. Windows), we only support a simple regex
//   syntax implemented as part of Google Test.  This limited
//   implementation should be enough most of the time when writing
//   death tests; though it lacks many features you can find in PCRE
//   or POSIX extended regex syntax.  For example, we don't support
//   union ("x|y"), grouping ("(xy)"), brackets ("[xy]"), and
//   repetition count ("x{5,7}"), among others.
//
//   Below is the syntax that we do support.  We chose it to be a
//   subset of both PCRE and POSIX extended regex, so it's easy to
//   learn wherever you come from.  In the following: 'A' denotes a
//   literal character, period (.), or a single \\ escape sequence;
//   'x' and 'y' denote regular expressions; 'm' and 'n' are for
//   natural numbers.
//
//     c     matches any literal character c
//     \\d   matches any decimal digit
//     \\D   matches any character that's not a decimal digit
//     \\f   matches \f
//     \\n   matches \n
//     \\r   matches \r
//     \\s   matches any ASCII whitespace, including \n
//     \\S   matches any character that's not a whitespace
//     \\t   matches \t
//     \\v   matches \v
//     \\w   matches any letter, _, or decimal digit
//     \\W   matches any character that \\w doesn't match
//     \\c   matches any literal character c, which must be a punctuation
//     .     matches any single character except \n
//     A?    matches 0 or 1 occurrences of A
//     A*    matches 0 or many occurrences of A
//     A+    matches 1 or many occurrences of A
//     ^     matches the beginning of a string (not that of each line)
//     $     matches the end of a string (not that of each line)
//     xy    matches x followed by y
//
//   If you accidentally use PCRE or POSIX extended regex features
//   not implemented by us, you will get a run-time failure.  In that
//   case, please try to rewrite your regular expression within the
//   above syntax.
//
//   This implementation is *not* meant to be as highly tuned or robust
//   as a compiled regex library, but should perform well enough for a
//   death test, which already incurs significant overhead by launching
//   a child process.
//
// Known caveats:
//
//   A "threadsafe" style death test obtains the path to the test
//   program from argv[0] and re-executes it in the sub-process.  For
//   simplicity, the current implementation doesn't search the PATH
//   when launching the sub-process.  This means that the user must
//   invoke the test program via a path that contains at least one
//   path separator (e.g. path/to/foo_test and
//   /absolute/path/to/bar_test are fine, but foo_test is not).  This
//   is rarely a problem as people usually don't put the test binary
//   directory in PATH.
//
// TODO(wan@google.com): make thread-safe death tests search the PATH.

// Asserts that a given statement causes the program to exit, with an
// integer exit status that satisfies predicate, and emitting error output
// that matches regex.
# define ASSERT_EXIT(statement, predicate, regex) \
    GTEST_DEATH_TEST_(statement, predicate, regex, GTEST_FATAL_FAILURE_)

// Like ASSERT_EXIT, but continues on to successive tests in the
// test case, if any:
# define EXPECT_EXIT(statement, predicate, regex) \
    GTEST_DEATH_TEST_(statement, predicate, regex, GTEST_NONFATAL_FAILURE_)

// Asserts that a given statement causes the program to exit, either by
// explicitly exiting with a nonzero exit code or being killed by a
// signal, and emitting error output that matches regex.
# define ASSERT_DEATH(statement, regex) \
    ASSERT_EXIT(statement, ::testing::internal::ExitedUnsuccessfully, regex)

// Like ASSERT_DEATH, but continues on to successive tests in the
// test case, if any:
# define EXPECT_DEATH(statement, regex) \
    EXPECT_EXIT(statement, ::testing::internal::ExitedUnsuccessfully, regex)

// Two predicate classes that can be used in {ASSERT,EXPECT}_EXIT*:

// Tests that an exit code describes a normal exit with a given exit code.
class GTEST_API_ ExitedWithCode {
 public:
  explicit ExitedWithCode(int exit_code);
  bool operator()(int exit_status) const;
 private:
  // No implementation - assignment is unsupported.
  void operator=(const ExitedWithCode& other);

  const int exit_code_;
};

# if !GTEST_OS_WINDOWS
// Tests that an exit code describes an exit due to termination by a
// given signal.
class GTEST_API_ KilledBySignal {
 public:
  explicit KilledBySignal(int signum);
  bool operator()(int exit_status) const;
 private:
  const int signum_;
};
# endif  // !GTEST_OS_WINDOWS

// EXPECT_DEBUG_DEATH asserts that the given statements die in debug mode.
// The death testing framework causes this to have interesting semantics,
// since the sideeffects of the call are only visible in opt mode, and not
// in debug mode.
//
// In practice, this can be used to test functions that utilize the
// LOG(DFATAL) macro using the following style:
//
// int DieInDebugOr12(int* sideeffect) {
//   if (sideeffect) {
//     *sideeffect = 12;
//   }
//   LOG(DFATAL) << "death";
//   return 12;
// }
//
// TEST(TestCase, TestDieOr12WorksInDgbAndOpt) {
//   int sideeffect = 0;
//   // Only asserts in dbg.
//   EXPECT_DEBUG_DEATH(DieInDebugOr12(&sideeffect), "death");
//
// #ifdef NDEBUG
//   // opt-mode has sideeffect visible.
//   EXPECT_EQ(12, sideeffect);
// #else
//   // dbg-mode no visible sideeffect.
//   EXPECT_EQ(0, sideeffect);
// #endif
// }
//
// This will assert that DieInDebugReturn12InOpt() crashes in debug
// mode, usually due to a DCHECK or LOG(DFATAL), but returns the
// appropriate fallback value (12 in this case) in opt mode. If you
// need to test that a function has appropriate side-effects in opt
// mode, include assertions against the side-effects.  A general
// pattern for this is:
//
// EXPECT_DEBUG_DEATH({
//   // Side-effects here will have an effect after this statement in
//   // opt mode, but none in debug mode.
//   EXPECT_EQ(12, DieInDebugOr12(&sideeffect));
// }, "death");
//
# ifdef NDEBUG

#  define EXPECT_DEBUG_DEATH(statement, regex) \
  GTEST_EXECUTE_STATEMENT_(statement, regex)

#  define ASSERT_DEBUG_DEATH(statement, regex) \
  GTEST_EXECUTE_STATEMENT_(statement, regex)

# else

#  define EXPECT_DEBUG_DEATH(statement, regex) \
  EXPECT_DEATH(statement, regex)

#  define ASSERT_DEBUG_DEATH(statement, regex) \
  ASSERT_DEATH(statement, regex)

# endif  // NDEBUG for EXPECT_DEBUG_DEATH
#endif  // GTEST_HAS_DEATH_TEST

// EXPECT_DEATH_IF_SUPPORTED(statement, regex) and
// ASSERT_DEATH_IF_SUPPORTED(statement, regex) expand to real death tests if
// death tests are supported; otherwise they just issue a warning.  This is
// useful when you are combining death test assertions with normal test
// assertions in one test.
#if GTEST_HAS_DEATH_TEST
# define EXPECT_DEATH_IF_SUPPORTED(statement, regex) \
    EXPECT_DEATH(statement, regex)
# define ASSERT_DEATH_IF_SUPPORTED(statement, regex) \
    ASSERT_DEATH(statement, regex)
#else
# define EXPECT_DEATH_IF_SUPPORTED(statement, regex) \
    GTEST_UNSUPPORTED_DEATH_TEST_(statement, regex, )
# define ASSERT_DEATH_IF_SUPPORTED(statement, regex) \
    GTEST_UNSUPPORTED_DEATH_TEST_(statement, regex, return)
#endif

}  // namespace testing

#endif  // GTEST_INCLUDE_GTEST_GTEST_DEATH_TEST_H_
// This file was GENERATED by command:
//     pump.py gtest-param-test.h.pump
// DO NOT EDIT BY HAND!!!

// Copyright 2008, Google Inc.
// All rights reserved.
//
// Redistribution and use in source and binary forms, with or without
// modification, are permitted provided that the following conditions are
// met:
//
//     * Redistributions of source code must retain the above copyright
// notice, this list of conditions and the following disclaimer.
//     * Redistributions in binary form must reproduce the above
// copyright notice, this list of conditions and the following disclaimer
// in the documentation and/or other materials provided with the
// distribution.
//     * Neither the name of Google Inc. nor the names of its
// contributors may be used to endorse or promote products derived from
// this software without specific prior written permission.
//
// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
// "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
// LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
// A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
// OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
// SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
// LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
// DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
// THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
//
// Authors: vladl@google.com (Vlad Losev)
//
// Macros and functions for implementing parameterized tests
// in Google C++ Testing Framework (Google Test)
//
// This file is generated by a SCRIPT.  DO NOT EDIT BY HAND!
//
#ifndef GTEST_INCLUDE_GTEST_GTEST_PARAM_TEST_H_
#define GTEST_INCLUDE_GTEST_GTEST_PARAM_TEST_H_


// Value-parameterized tests allow you to test your code with different
// parameters without writing multiple copies of the same test.
//
// Here is how you use value-parameterized tests:

#if 0

// To write value-parameterized tests, first you should define a fixture
// class. It is usually derived from testing::TestWithParam<T> (see below for
// another inheritance scheme that's sometimes useful in more complicated
// class hierarchies), where the type of your parameter values.
// TestWithParam<T> is itself derived from testing::Test. T can be any
// copyable type. If it's a raw pointer, you are responsible for managing the
// lifespan of the pointed values.

class FooTest : public ::testing::TestWithParam<const char*> {
  // You can implement all the usual class fixture members here.
};

// Then, use the TEST_P macro to define as many parameterized tests
// for this fixture as you want. The _P suffix is for "parameterized"
// or "pattern", whichever you prefer to think.

TEST_P(FooTest, DoesBlah) {
  // Inside a test, access the test parameter with the GetParam() method
  // of the TestWithParam<T> class:
  EXPECT_TRUE(foo.Blah(GetParam()));
  ...
}

TEST_P(FooTest, HasBlahBlah) {
  ...
}

// Finally, you can use INSTANTIATE_TEST_CASE_P to instantiate the test
// case with any set of parameters you want. Google Test defines a number
// of functions for generating test parameters. They return what we call
// (surprise!) parameter generators. Here is a  summary of them, which
// are all in the testing namespace:
//
//
//  Range(begin, end [, step]) - Yields values {begin, begin+step,
//                               begin+step+step, ...}. The values do not
//                               include end. step defaults to 1.
//  Values(v1, v2, ..., vN)    - Yields values {v1, v2, ..., vN}.
//  ValuesIn(container)        - Yields values from a C-style array, an STL
//  ValuesIn(begin,end)          container, or an iterator range [begin, end).
//  Bool()                     - Yields sequence {false, true}.
//  Combine(g1, g2, ..., gN)   - Yields all combinations (the Cartesian product
//                               for the math savvy) of the values generated
//                               by the N generators.
//
// For more details, see comments at the definitions of these functions below
// in this file.
//
// The following statement will instantiate tests from the FooTest test case
// each with parameter values "meeny", "miny", and "moe".

INSTANTIATE_TEST_CASE_P(InstantiationName,
                        FooTest,
                        Values("meeny", "miny", "moe"));

// To distinguish different instances of the pattern, (yes, you
// can instantiate it more then once) the first argument to the
// INSTANTIATE_TEST_CASE_P macro is a prefix that will be added to the
// actual test case name. Remember to pick unique prefixes for different
// instantiations. The tests from the instantiation above will have
// these names:
//
//    * InstantiationName/FooTest.DoesBlah/0 for "meeny"
//    * InstantiationName/FooTest.DoesBlah/1 for "miny"
//    * InstantiationName/FooTest.DoesBlah/2 for "moe"
//    * InstantiationName/FooTest.HasBlahBlah/0 for "meeny"
//    * InstantiationName/FooTest.HasBlahBlah/1 for "miny"
//    * InstantiationName/FooTest.HasBlahBlah/2 for "moe"
//
// You can use these names in --gtest_filter.
//
// This statement will instantiate all tests from FooTest again, each
// with parameter values "cat" and "dog":

const char* pets[] = {"cat", "dog"};
INSTANTIATE_TEST_CASE_P(AnotherInstantiationName, FooTest, ValuesIn(pets));

// The tests from the instantiation above will have these names:
//
//    * AnotherInstantiationName/FooTest.DoesBlah/0 for "cat"
//    * AnotherInstantiationName/FooTest.DoesBlah/1 for "dog"
//    * AnotherInstantiationName/FooTest.HasBlahBlah/0 for "cat"
//    * AnotherInstantiationName/FooTest.HasBlahBlah/1 for "dog"
//
// Please note that INSTANTIATE_TEST_CASE_P will instantiate all tests
// in the given test case, whether their definitions come before or
// AFTER the INSTANTIATE_TEST_CASE_P statement.
//
// Please also note that generator expressions (including parameters to the
// generators) are evaluated in InitGoogleTest(), after main() has started.
// This allows the user on one hand, to adjust generator parameters in order
// to dynamically determine a set of tests to run and on the other hand,
// give the user a chance to inspect the generated tests with Google Test
// reflection API before RUN_ALL_TESTS() is executed.
//
// You can see samples/sample7_unittest.cc and samples/sample8_unittest.cc
// for more examples.
//
// In the future, we plan to publish the API for defining new parameter
// generators. But for now this interface remains part of the internal
// implementation and is subject to change.
//
//
// A parameterized test fixture must be derived from testing::Test and from
// testing::WithParamInterface<T>, where T is the type of the parameter
// values. Inheriting from TestWithParam<T> satisfies that requirement because
// TestWithParam<T> inherits from both Test and WithParamInterface. In more
// complicated hierarchies, however, it is occasionally useful to inherit
// separately from Test and WithParamInterface. For example:

class BaseTest : public ::testing::Test {
  // You can inherit all the usual members for a non-parameterized test
  // fixture here.
};

class DerivedTest : public BaseTest, public ::testing::WithParamInterface<int> {
  // The usual test fixture members go here too.
};

TEST_F(BaseTest, HasFoo) {
  // This is an ordinary non-parameterized test.
}

TEST_P(DerivedTest, DoesBlah) {
  // GetParam works just the same here as if you inherit from TestWithParam.
  EXPECT_TRUE(foo.Blah(GetParam()));
}

#endif  // 0


#if !GTEST_OS_SYMBIAN
# include <utility>
#endif

// scripts/fuse_gtest.py depends on gtest's own header being #included
// *unconditionally*.  Therefore these #includes cannot be moved
// inside #if GTEST_HAS_PARAM_TEST.
// Copyright 2008 Google Inc.
// All Rights Reserved.
//
// Redistribution and use in source and binary forms, with or without
// modification, are permitted provided that the following conditions are
// met:
//
//     * Redistributions of source code must retain the above copyright
// notice, this list of conditions and the following disclaimer.
//     * Redistributions in binary form must reproduce the above
// copyright notice, this list of conditions and the following disclaimer
// in the documentation and/or other materials provided with the
// distribution.
//     * Neither the name of Google Inc. nor the names of its
// contributors may be used to endorse or promote products derived from
// this software without specific prior written permission.
//
// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
// "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
// LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
// A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
// OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
// SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
// LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
// DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
// THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
//
// Author: vladl@google.com (Vlad Losev)

// Type and function utilities for implementing parameterized tests.

#ifndef GTEST_INCLUDE_GTEST_INTERNAL_GTEST_PARAM_UTIL_H_
#define GTEST_INCLUDE_GTEST_INTERNAL_GTEST_PARAM_UTIL_H_

#include <ctype.h>

#include <iterator>
#include <set>
#include <utility>
#include <vector>

// scripts/fuse_gtest.py depends on gtest's own header being #included
// *unconditionally*.  Therefore these #includes cannot be moved
// inside #if GTEST_HAS_PARAM_TEST.
// Copyright 2003 Google Inc.
// All rights reserved.
//
// Redistribution and use in source and binary forms, with or without
// modification, are permitted provided that the following conditions are
// met:
//
//     * Redistributions of source code must retain the above copyright
// notice, this list of conditions and the following disclaimer.
//     * Redistributions in binary form must reproduce the above
// copyright notice, this list of conditions and the following disclaimer
// in the documentation and/or other materials provided with the
// distribution.
//     * Neither the name of Google Inc. nor the names of its
// contributors may be used to endorse or promote products derived from
// this software without specific prior written permission.
//
// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
// "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
// LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
// A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
// OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
// SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
// LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
// DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
// THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
//
// Authors: Dan Egnor (egnor@google.com)
//
// A "smart" pointer type with reference tracking.  Every pointer to a
// particular object is kept on a circular linked list.  When the last pointer
// to an object is destroyed or reassigned, the object is deleted.
//
// Used properly, this deletes the object when the last reference goes away.
// There are several caveats:
// - Like all reference counting schemes, cycles lead to leaks.
// - Each smart pointer is actually two pointers (8 bytes instead of 4).
// - Every time a pointer is assigned, the entire list of pointers to that
//   object is traversed.  This class is therefore NOT SUITABLE when there
//   will often be more than two or three pointers to a particular object.
// - References are only tracked as long as linked_ptr<> objects are copied.
//   If a linked_ptr<> is converted to a raw pointer and back, BAD THINGS
//   will happen (double deletion).
//
// A good use of this class is storing object references in STL containers.
// You can safely put linked_ptr<> in a vector<>.
// Other uses may not be as good.
//
// Note: If you use an incomplete type with linked_ptr<>, the class
// *containing* linked_ptr<> must have a constructor and destructor (even
// if they do nothing!).
//
// Bill Gibbons suggested we use something like this.
//
// Thread Safety:
//   Unlike other linked_ptr implementations, in this implementation
//   a linked_ptr object is thread-safe in the sense that:
//     - it's safe to copy linked_ptr objects concurrently,
//     - it's safe to copy *from* a linked_ptr and read its underlying
//       raw pointer (e.g. via get()) concurrently, and
//     - it's safe to write to two linked_ptrs that point to the same
//       shared object concurrently.
// TODO(wan@google.com): rename this to safe_linked_ptr to avoid
// confusion with normal linked_ptr.

#ifndef GTEST_INCLUDE_GTEST_INTERNAL_GTEST_LINKED_PTR_H_
#define GTEST_INCLUDE_GTEST_INTERNAL_GTEST_LINKED_PTR_H_

#include <stdlib.h>
#include <assert.h>


namespace testing {
namespace internal {

// Protects copying of all linked_ptr objects.
GTEST_API_ GTEST_DECLARE_STATIC_MUTEX_(g_linked_ptr_mutex);

// This is used internally by all instances of linked_ptr<>.  It needs to be
// a non-template class because different types of linked_ptr<> can refer to
// the same object (linked_ptr<Superclass>(obj) vs linked_ptr<Subclass>(obj)).
// So, it needs to be possible for different types of linked_ptr to participate
// in the same circular linked list, so we need a single class type here.
//
// DO NOT USE THIS CLASS DIRECTLY YOURSELF.  Use linked_ptr<T>.
class linked_ptr_internal {
 public:
  // Create a new circle that includes only this instance.
  void join_new() {
    next_ = this;
  }

  // Many linked_ptr operations may change p.link_ for some linked_ptr
  // variable p in the same circle as this object.  Therefore we need
  // to prevent two such operations from occurring concurrently.
  //
  // Note that different types of linked_ptr objects can coexist in a
  // circle (e.g. linked_ptr<Base>, linked_ptr<Derived1>, and
  // linked_ptr<Derived2>).  Therefore we must use a single mutex to
  // protect all linked_ptr objects.  This can create serious
  // contention in production code, but is acceptable in a testing
  // framework.

  // Join an existing circle.
  void join(linked_ptr_internal const* ptr)
      GTEST_LOCK_EXCLUDED_(g_linked_ptr_mutex) {
    MutexLock lock(&g_linked_ptr_mutex);

    linked_ptr_internal const* p = ptr;
    while (p->next_ != ptr) {
      assert(p->next_ != this &&
             "Trying to join() a linked ring we are already in. "
             "Is GMock thread safety enabled?");
      p = p->next_;
    }
    p->next_ = this;
    next_ = ptr;
  }

  // Leave whatever circle we're part of.  Returns true if we were the
  // last member of the circle.  Once this is done, you can join() another.
  bool depart()
      GTEST_LOCK_EXCLUDED_(g_linked_ptr_mutex) {
    MutexLock lock(&g_linked_ptr_mutex);

    if (next_ == this) return true;
    linked_ptr_internal const* p = next_;
    while (p->next_ != this) {
      assert(p->next_ != next_ &&
             "Trying to depart() a linked ring we are not in. "
             "Is GMock thread safety enabled?");
      p = p->next_;
    }
    p->next_ = next_;
    return false;
  }

 private:
  mutable linked_ptr_internal const* next_;
};

template <typename T>
class linked_ptr {
 public:
  typedef T element_type;

  // Take over ownership of a raw pointer.  This should happen as soon as
  // possible after the object is created.
  explicit linked_ptr(T* ptr = NULL) { capture(ptr); }
  ~linked_ptr() { depart(); }

  // Copy an existing linked_ptr<>, adding ourselves to the list of references.
  template <typename U> linked_ptr(linked_ptr<U> const& ptr) { copy(&ptr); }
  linked_ptr(linked_ptr const& ptr) {  // NOLINT
    assert(&ptr != this);
    copy(&ptr);
  }

  // Assignment releases the old value and acquires the new.
  template <typename U> linked_ptr& operator=(linked_ptr<U> const& ptr) {
    depart();
    copy(&ptr);
    return *this;
  }

  linked_ptr& operator=(linked_ptr const& ptr) {
    if (&ptr != this) {
      depart();
      copy(&ptr);
    }
    return *this;
  }

  // Smart pointer members.
  void reset(T* ptr = NULL) {
    depart();
    capture(ptr);
  }
  T* get() const { return value_; }
  T* operator->() const { return value_; }
  T& operator*() const { return *value_; }

  bool operator==(T* p) const { return value_ == p; }
  bool operator!=(T* p) const { return value_ != p; }
  template <typename U>
  bool operator==(linked_ptr<U> const& ptr) const {
    return value_ == ptr.get();
  }
  template <typename U>
  bool operator!=(linked_ptr<U> const& ptr) const {
    return value_ != ptr.get();
  }

 private:
  template <typename U>
  friend class linked_ptr;

  T* value_;
  linked_ptr_internal link_;

  void depart() {
    if (link_.depart()) delete value_;
  }

  void capture(T* ptr) {
    value_ = ptr;
    link_.join_new();
  }

  template <typename U> void copy(linked_ptr<U> const* ptr) {
    value_ = ptr->get();
    if (value_)
      link_.join(&ptr->link_);
    else
      link_.join_new();
  }
};

template<typename T> inline
bool operator==(T* ptr, const linked_ptr<T>& x) {
  return ptr == x.get();
}

template<typename T> inline
bool operator!=(T* ptr, const linked_ptr<T>& x) {
  return ptr != x.get();
}

// A function to convert T* into linked_ptr<T>
// Doing e.g. make_linked_ptr(new FooBarBaz<type>(arg)) is a shorter notation
// for linked_ptr<FooBarBaz<type> >(new FooBarBaz<type>(arg))
template <typename T>
linked_ptr<T> make_linked_ptr(T* ptr) {
  return linked_ptr<T>(ptr);
}

}  // namespace internal
}  // namespace testing

#endif  // GTEST_INCLUDE_GTEST_INTERNAL_GTEST_LINKED_PTR_H_
// Copyright 2007, Google Inc.
// All rights reserved.
//
// Redistribution and use in source and binary forms, with or without
// modification, are permitted provided that the following conditions are
// met:
//
//     * Redistributions of source code must retain the above copyright
// notice, this list of conditions and the following disclaimer.
//     * Redistributions in binary form must reproduce the above
// copyright notice, this list of conditions and the following disclaimer
// in the documentation and/or other materials provided with the
// distribution.
//     * Neither the name of Google Inc. nor the names of its
// contributors may be used to endorse or promote products derived from
// this software without specific prior written permission.
//
// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
// "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
// LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
// A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
// OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
// SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
// LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
// DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
// THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
//
// Author: wan@google.com (Zhanyong Wan)

// Google Test - The Google C++ Testing Framework
//
// This file implements a universal value printer that can print a
// value of any type T:
//
//   void ::testing::internal::UniversalPrinter<T>::Print(value, ostream_ptr);
//
// A user can teach this function how to print a class type T by
// defining either operator<<() or PrintTo() in the namespace that
// defines T.  More specifically, the FIRST defined function in the
// following list will be used (assuming T is defined in namespace
// foo):
//
//   1. foo::PrintTo(const T&, ostream*)
//   2. operator<<(ostream&, const T&) defined in either foo or the
//      global namespace.
//
// If none of the above is defined, it will print the debug string of
// the value if it is a protocol buffer, or print the raw bytes in the
// value otherwise.
//
// To aid debugging: when T is a reference type, the address of the
// value is also printed; when T is a (const) char pointer, both the
// pointer value and the NUL-terminated string it points to are
// printed.
//
// We also provide some convenient wrappers:
//
//   // Prints a value to a string.  For a (const or not) char
//   // pointer, the NUL-terminated string (but not the pointer) is
//   // printed.
//   std::string ::testing::PrintToString(const T& value);
//
//   // Prints a value tersely: for a reference type, the referenced
//   // value (but not the address) is printed; for a (const or not) char
//   // pointer, the NUL-terminated string (but not the pointer) is
//   // printed.
//   void ::testing::internal::UniversalTersePrint(const T& value, ostream*);
//
//   // Prints value using the type inferred by the compiler.  The difference
//   // from UniversalTersePrint() is that this function prints both the
//   // pointer and the NUL-terminated string for a (const or not) char pointer.
//   void ::testing::internal::UniversalPrint(const T& value, ostream*);
//
//   // Prints the fields of a tuple tersely to a string vector, one
//   // element for each field. Tuple support must be enabled in
//   // gtest-port.h.
//   std::vector<string> UniversalTersePrintTupleFieldsToStrings(
//       const Tuple& value);
//
// Known limitation:
//
// The print primitives print the elements of an STL-style container
// using the compiler-inferred type of *iter where iter is a
// const_iterator of the container.  When const_iterator is an input
// iterator but not a forward iterator, this inferred type may not
// match value_type, and the print output may be incorrect.  In
// practice, this is rarely a problem as for most containers
// const_iterator is a forward iterator.  We'll fix this if there's an
// actual need for it.  Note that this fix cannot rely on value_type
// being defined as many user-defined container types don't have
// value_type.

#ifndef GTEST_INCLUDE_GTEST_GTEST_PRINTERS_H_
#define GTEST_INCLUDE_GTEST_GTEST_PRINTERS_H_

#include <ostream>  // NOLINT
#include <sstream>
#include <string>
#include <utility>
#include <vector>

#if GTEST_HAS_STD_TUPLE_
# include <tuple>
#endif

namespace testing {

// Definitions in the 'internal' and 'internal2' name spaces are
// subject to change without notice.  DO NOT USE THEM IN USER CODE!
namespace internal2 {

// Prints the given number of bytes in the given object to the given
// ostream.
GTEST_API_ void PrintBytesInObjectTo(const unsigned char* obj_bytes,
                                     size_t count,
                                     ::std::ostream* os);

// For selecting which printer to use when a given type has neither <<
// nor PrintTo().
enum TypeKind {
  kProtobuf,              // a protobuf type
  kConvertibleToInteger,  // a type implicitly convertible to BiggestInt
                          // (e.g. a named or unnamed enum type)
  kOtherType              // anything else
};

// TypeWithoutFormatter<T, kTypeKind>::PrintValue(value, os) is called
// by the universal printer to print a value of type T when neither
// operator<< nor PrintTo() is defined for T, where kTypeKind is the
// "kind" of T as defined by enum TypeKind.
template <typename T, TypeKind kTypeKind>
class TypeWithoutFormatter {
 public:
  // This default version is called when kTypeKind is kOtherType.
  static void PrintValue(const T& value, ::std::ostream* os) {
    PrintBytesInObjectTo(reinterpret_cast<const unsigned char*>(&value),
                         sizeof(value), os);
  }
};

// We print a protobuf using its ShortDebugString() when the string
// doesn't exceed this many characters; otherwise we print it using
// DebugString() for better readability.
const size_t kProtobufOneLinerMaxLength = 50;

template <typename T>
class TypeWithoutFormatter<T, kProtobuf> {
 public:
  static void PrintValue(const T& value, ::std::ostream* os) {
    const ::testing::internal::string short_str = value.ShortDebugString();
    const ::testing::internal::string pretty_str =
        short_str.length() <= kProtobufOneLinerMaxLength ?
        short_str : ("\n" + value.DebugString());
    *os << ("<" + pretty_str + ">");
  }
};

template <typename T>
class TypeWithoutFormatter<T, kConvertibleToInteger> {
 public:
  // Since T has no << operator or PrintTo() but can be implicitly
  // converted to BiggestInt, we print it as a BiggestInt.
  //
  // Most likely T is an enum type (either named or unnamed), in which
  // case printing it as an integer is the desired behavior.  In case
  // T is not an enum, printing it as an integer is the best we can do
  // given that it has no user-defined printer.
  static void PrintValue(const T& value, ::std::ostream* os) {
    const internal::BiggestInt kBigInt = value;
    *os << kBigInt;
  }
};

// Prints the given value to the given ostream.  If the value is a
// protocol message, its debug string is printed; if it's an enum or
// of a type implicitly convertible to BiggestInt, it's printed as an
// integer; otherwise the bytes in the value are printed.  This is
// what UniversalPrinter<T>::Print() does when it knows nothing about
// type T and T has neither << operator nor PrintTo().
//
// A user can override this behavior for a class type Foo by defining
// a << operator in the namespace where Foo is defined.
//
// We put this operator in namespace 'internal2' instead of 'internal'
// to simplify the implementation, as much code in 'internal' needs to
// use << in STL, which would conflict with our own << were it defined
// in 'internal'.
//
// Note that this operator<< takes a generic std::basic_ostream<Char,
// CharTraits> type instead of the more restricted std::ostream.  If
// we define it to take an std::ostream instead, we'll get an
// "ambiguous overloads" compiler error when trying to print a type
// Foo that supports streaming to std::basic_ostream<Char,
// CharTraits>, as the compiler cannot tell whether
// operator<<(std::ostream&, const T&) or
// operator<<(std::basic_stream<Char, CharTraits>, const Foo&) is more
// specific.
template <typename Char, typename CharTraits, typename T>
::std::basic_ostream<Char, CharTraits>& operator<<(
    ::std::basic_ostream<Char, CharTraits>& os, const T& x) {
  TypeWithoutFormatter<T,
      (internal::IsAProtocolMessage<T>::value ? kProtobuf :
       internal::ImplicitlyConvertible<const T&, internal::BiggestInt>::value ?
       kConvertibleToInteger : kOtherType)>::PrintValue(x, &os);
  return os;
}

}  // namespace internal2
}  // namespace testing

// This namespace MUST NOT BE NESTED IN ::testing, or the name look-up
// magic needed for implementing UniversalPrinter won't work.
namespace testing_internal {

// Used to print a value that is not an STL-style container when the
// user doesn't define PrintTo() for it.
template <typename T>
void DefaultPrintNonContainerTo(const T& value, ::std::ostream* os) {
  // With the following statement, during unqualified name lookup,
  // testing::internal2::operator<< appears as if it was declared in
  // the nearest enclosing namespace that contains both
  // ::testing_internal and ::testing::internal2, i.e. the global
  // namespace.  For more details, refer to the C++ Standard section
  // 7.3.4-1 [namespace.udir].  This allows us to fall back onto
  // testing::internal2::operator<< in case T doesn't come with a <<
  // operator.
  //
  // We cannot write 'using ::testing::internal2::operator<<;', which
  // gcc 3.3 fails to compile due to a compiler bug.
  using namespace ::testing::internal2;  // NOLINT

  // Assuming T is defined in namespace foo, in the next statement,
  // the compiler will consider all of:
  //
  //   1. foo::operator<< (thanks to Koenig look-up),
  //   2. ::operator<< (as the current namespace is enclosed in ::),
  //   3. testing::internal2::operator<< (thanks to the using statement above).
  //
  // The operator<< whose type matches T best will be picked.
  //
  // We deliberately allow #2 to be a candidate, as sometimes it's
  // impossible to define #1 (e.g. when foo is ::std, defining
  // anything in it is undefined behavior unless you are a compiler
  // vendor.).
  *os << value;
}

}  // namespace testing_internal

namespace testing {
namespace internal {

// FormatForComparison<ToPrint, OtherOperand>::Format(value) formats a
// value of type ToPrint that is an operand of a comparison assertion
// (e.g. ASSERT_EQ).  OtherOperand is the type of the other operand in
// the comparison, and is used to help determine the best way to
// format the value.  In particular, when the value is a C string
// (char pointer) and the other operand is an STL string object, we
// want to format the C string as a string, since we know it is
// compared by value with the string object.  If the value is a char
// pointer but the other operand is not an STL string object, we don't
// know whether the pointer is supposed to point to a NUL-terminated
// string, and thus want to print it as a pointer to be safe.
//
// INTERNAL IMPLEMENTATION - DO NOT USE IN A USER PROGRAM.

// The default case.
template <typename ToPrint, typename OtherOperand>
class FormatForComparison {
 public:
  static ::std::string Format(const ToPrint& value) {
    return ::testing::PrintToString(value);
  }
};

// Array.
template <typename ToPrint, size_t N, typename OtherOperand>
class FormatForComparison<ToPrint[N], OtherOperand> {
 public:
  static ::std::string Format(const ToPrint* value) {
    return FormatForComparison<const ToPrint*, OtherOperand>::Format(value);
  }
};

// By default, print C string as pointers to be safe, as we don't know
// whether they actually point to a NUL-terminated string.

#define GTEST_IMPL_FORMAT_C_STRING_AS_POINTER_(CharType)                \
  template <typename OtherOperand>                                      \
  class FormatForComparison<CharType*, OtherOperand> {                  \
   public:                                                              \
    static ::std::string Format(CharType* value) {                      \
      return ::testing::PrintToString(static_cast<const void*>(value)); \
    }                                                                   \
  }

GTEST_IMPL_FORMAT_C_STRING_AS_POINTER_(char);
GTEST_IMPL_FORMAT_C_STRING_AS_POINTER_(const char);
GTEST_IMPL_FORMAT_C_STRING_AS_POINTER_(wchar_t);
GTEST_IMPL_FORMAT_C_STRING_AS_POINTER_(const wchar_t);

#undef GTEST_IMPL_FORMAT_C_STRING_AS_POINTER_

// If a C string is compared with an STL string object, we know it's meant
// to point to a NUL-terminated string, and thus can print it as a string.

#define GTEST_IMPL_FORMAT_C_STRING_AS_STRING_(CharType, OtherStringType) \
  template <>                                                           \
  class FormatForComparison<CharType*, OtherStringType> {               \
   public:                                                              \
    static ::std::string Format(CharType* value) {                      \
      return ::testing::PrintToString(value);                           \
    }                                                                   \
  }

GTEST_IMPL_FORMAT_C_STRING_AS_STRING_(char, ::std::string);
GTEST_IMPL_FORMAT_C_STRING_AS_STRING_(const char, ::std::string);

#if GTEST_HAS_GLOBAL_STRING
GTEST_IMPL_FORMAT_C_STRING_AS_STRING_(char, ::string);
GTEST_IMPL_FORMAT_C_STRING_AS_STRING_(const char, ::string);
#endif

#if GTEST_HAS_GLOBAL_WSTRING
GTEST_IMPL_FORMAT_C_STRING_AS_STRING_(wchar_t, ::wstring);
GTEST_IMPL_FORMAT_C_STRING_AS_STRING_(const wchar_t, ::wstring);
#endif

#if GTEST_HAS_STD_WSTRING
GTEST_IMPL_FORMAT_C_STRING_AS_STRING_(wchar_t, ::std::wstring);
GTEST_IMPL_FORMAT_C_STRING_AS_STRING_(const wchar_t, ::std::wstring);
#endif

#undef GTEST_IMPL_FORMAT_C_STRING_AS_STRING_

// Formats a comparison assertion (e.g. ASSERT_EQ, EXPECT_LT, and etc)
// operand to be used in a failure message.  The type (but not value)
// of the other operand may affect the format.  This allows us to
// print a char* as a raw pointer when it is compared against another
// char* or void*, and print it as a C string when it is compared
// against an std::string object, for example.
//
// INTERNAL IMPLEMENTATION - DO NOT USE IN A USER PROGRAM.
template <typename T1, typename T2>
std::string FormatForComparisonFailureMessage(
    const T1& value, const T2& /* other_operand */) {
  return FormatForComparison<T1, T2>::Format(value);
}

// UniversalPrinter<T>::Print(value, ostream_ptr) prints the given
// value to the given ostream.  The caller must ensure that
// 'ostream_ptr' is not NULL, or the behavior is undefined.
//
// We define UniversalPrinter as a class template (as opposed to a
// function template), as we need to partially specialize it for
// reference types, which cannot be done with function templates.
template <typename T>
class UniversalPrinter;

template <typename T>
void UniversalPrint(const T& value, ::std::ostream* os);

// Used to print an STL-style container when the user doesn't define
// a PrintTo() for it.
template <typename C>
void DefaultPrintTo(IsContainer /* dummy */,
                    false_type /* is not a pointer */,
                    const C& container, ::std::ostream* os) {
  const size_t kMaxCount = 32;  // The maximum number of elements to print.
  *os << '{';
  size_t count = 0;
  for (typename C::const_iterator it = container.begin();
       it != container.end(); ++it, ++count) {
    if (count > 0) {
      *os << ',';
      if (count == kMaxCount) {  // Enough has been printed.
        *os << " ...";
        break;
      }
    }
    *os << ' ';
    // We cannot call PrintTo(*it, os) here as PrintTo() doesn't
    // handle *it being a native array.
    internal::UniversalPrint(*it, os);
  }

  if (count > 0) {
    *os << ' ';
  }
  *os << '}';
}

// Used to print a pointer that is neither a char pointer nor a member
// pointer, when the user doesn't define PrintTo() for it.  (A member
// variable pointer or member function pointer doesn't really point to
// a location in the address space.  Their representation is
// implementation-defined.  Therefore they will be printed as raw
// bytes.)
template <typename T>
void DefaultPrintTo(IsNotContainer /* dummy */,
                    true_type /* is a pointer */,
                    T* p, ::std::ostream* os) {
  if (p == NULL) {
    *os << "NULL";
  } else {
    // C++ doesn't allow casting from a function pointer to any object
    // pointer.
    //
    // IsTrue() silences warnings: "Condition is always true",
    // "unreachable code".
    if (IsTrue(ImplicitlyConvertible<T*, const void*>::value)) {
      // T is not a function type.  We just call << to print p,
      // relying on ADL to pick up user-defined << for their pointer
      // types, if any.
      *os << p;
    } else {
      // T is a function type, so '*os << p' doesn't do what we want
      // (it just prints p as bool).  We want to print p as a const
      // void*.  However, we cannot cast it to const void* directly,
      // even using reinterpret_cast, as earlier versions of gcc
      // (e.g. 3.4.5) cannot compile the cast when p is a function
      // pointer.  Casting to UInt64 first solves the problem.
      *os << reinterpret_cast<const void*>(
          reinterpret_cast<internal::UInt64>(p));
    }
  }
}

// Used to print a non-container, non-pointer value when the user
// doesn't define PrintTo() for it.
template <typename T>
void DefaultPrintTo(IsNotContainer /* dummy */,
                    false_type /* is not a pointer */,
                    const T& value, ::std::ostream* os) {
  ::testing_internal::DefaultPrintNonContainerTo(value, os);
}

// Prints the given value using the << operator if it has one;
// otherwise prints the bytes in it.  This is what
// UniversalPrinter<T>::Print() does when PrintTo() is not specialized
// or overloaded for type T.
//
// A user can override this behavior for a class type Foo by defining
// an overload of PrintTo() in the namespace where Foo is defined.  We
// give the user this option as sometimes defining a << operator for
// Foo is not desirable (e.g. the coding style may prevent doing it,
// or there is already a << operator but it doesn't do what the user
// wants).
template <typename T>
void PrintTo(const T& value, ::std::ostream* os) {
  // DefaultPrintTo() is overloaded.  The type of its first two
  // arguments determine which version will be picked.  If T is an
  // STL-style container, the version for container will be called; if
  // T is a pointer, the pointer version will be called; otherwise the
  // generic version will be called.
  //
  // Note that we check for container types here, prior to we check
  // for protocol message types in our operator<<.  The rationale is:
  //
  // For protocol messages, we want to give people a chance to
  // override Google Mock's format by defining a PrintTo() or
  // operator<<.  For STL containers, other formats can be
  // incompatible with Google Mock's format for the container
  // elements; therefore we check for container types here to ensure
  // that our format is used.
  //
  // The second argument of DefaultPrintTo() is needed to bypass a bug
  // in Symbian's C++ compiler that prevents it from picking the right
  // overload between:
  //
  //   PrintTo(const T& x, ...);
  //   PrintTo(T* x, ...);
  DefaultPrintTo(IsContainerTest<T>(0), is_pointer<T>(), value, os);
}

// The following list of PrintTo() overloads tells
// UniversalPrinter<T>::Print() how to print standard types (built-in
// types, strings, plain arrays, and pointers).

// Overloads for various char types.
GTEST_API_ void PrintTo(unsigned char c, ::std::ostream* os);
GTEST_API_ void PrintTo(signed char c, ::std::ostream* os);
inline void PrintTo(char c, ::std::ostream* os) {
  // When printing a plain char, we always treat it as unsigned.  This
  // way, the output won't be affected by whether the compiler thinks
  // char is signed or not.
  PrintTo(static_cast<unsigned char>(c), os);
}

// Overloads for other simple built-in types.
inline void PrintTo(bool x, ::std::ostream* os) {
  *os << (x ? "true" : "false");
}

// Overload for wchar_t type.
// Prints a wchar_t as a symbol if it is printable or as its internal
// code otherwise and also as its decimal code (except for L'\0').
// The L'\0' char is printed as "L'\\0'". The decimal code is printed
// as signed integer when wchar_t is implemented by the compiler
// as a signed type and is printed as an unsigned integer when wchar_t
// is implemented as an unsigned type.
GTEST_API_ void PrintTo(wchar_t wc, ::std::ostream* os);

// Overloads for C strings.
GTEST_API_ void PrintTo(const char* s, ::std::ostream* os);
inline void PrintTo(char* s, ::std::ostream* os) {
  PrintTo(ImplicitCast_<const char*>(s), os);
}

// signed/unsigned char is often used for representing binary data, so
// we print pointers to it as void* to be safe.
inline void PrintTo(const signed char* s, ::std::ostream* os) {
  PrintTo(ImplicitCast_<const void*>(s), os);
}
inline void PrintTo(signed char* s, ::std::ostream* os) {
  PrintTo(ImplicitCast_<const void*>(s), os);
}
inline void PrintTo(const unsigned char* s, ::std::ostream* os) {
  PrintTo(ImplicitCast_<const void*>(s), os);
}
inline void PrintTo(unsigned char* s, ::std::ostream* os) {
  PrintTo(ImplicitCast_<const void*>(s), os);
}

// MSVC can be configured to define wchar_t as a typedef of unsigned
// short.  It defines _NATIVE_WCHAR_T_DEFINED when wchar_t is a native
// type.  When wchar_t is a typedef, defining an overload for const
// wchar_t* would cause unsigned short* be printed as a wide string,
// possibly causing invalid memory accesses.
#if !defined(_MSC_VER) || defined(_NATIVE_WCHAR_T_DEFINED)
// Overloads for wide C strings
GTEST_API_ void PrintTo(const wchar_t* s, ::std::ostream* os);
inline void PrintTo(wchar_t* s, ::std::ostream* os) {
  PrintTo(ImplicitCast_<const wchar_t*>(s), os);
}
#endif

// Overload for C arrays.  Multi-dimensional arrays are printed
// properly.

// Prints the given number of elements in an array, without printing
// the curly braces.
template <typename T>
void PrintRawArrayTo(const T a[], size_t count, ::std::ostream* os) {
  UniversalPrint(a[0], os);
  for (size_t i = 1; i != count; i++) {
    *os << ", ";
    UniversalPrint(a[i], os);
  }
}

// Overloads for ::string and ::std::string.
#if GTEST_HAS_GLOBAL_STRING
GTEST_API_ void PrintStringTo(const ::string&s, ::std::ostream* os);
inline void PrintTo(const ::string& s, ::std::ostream* os) {
  PrintStringTo(s, os);
}
#endif  // GTEST_HAS_GLOBAL_STRING

GTEST_API_ void PrintStringTo(const ::std::string&s, ::std::ostream* os);
inline void PrintTo(const ::std::string& s, ::std::ostream* os) {
  PrintStringTo(s, os);
}

// Overloads for ::wstring and ::std::wstring.
#if GTEST_HAS_GLOBAL_WSTRING
GTEST_API_ void PrintWideStringTo(const ::wstring&s, ::std::ostream* os);
inline void PrintTo(const ::wstring& s, ::std::ostream* os) {
  PrintWideStringTo(s, os);
}
#endif  // GTEST_HAS_GLOBAL_WSTRING

#if GTEST_HAS_STD_WSTRING
GTEST_API_ void PrintWideStringTo(const ::std::wstring&s, ::std::ostream* os);
inline void PrintTo(const ::std::wstring& s, ::std::ostream* os) {
  PrintWideStringTo(s, os);
}
#endif  // GTEST_HAS_STD_WSTRING

#if GTEST_HAS_TR1_TUPLE || GTEST_HAS_STD_TUPLE_
// Helper function for printing a tuple.  T must be instantiated with
// a tuple type.
template <typename T>
void PrintTupleTo(const T& t, ::std::ostream* os);
#endif  // GTEST_HAS_TR1_TUPLE || GTEST_HAS_STD_TUPLE_

#if GTEST_HAS_TR1_TUPLE
// Overload for ::std::tr1::tuple.  Needed for printing function arguments,
// which are packed as tuples.

// Overloaded PrintTo() for tuples of various arities.  We support
// tuples of up-to 10 fields.  The following implementation works
// regardless of whether tr1::tuple is implemented using the
// non-standard variadic template feature or not.

inline void PrintTo(const ::std::tr1::tuple<>& t, ::std::ostream* os) {
  PrintTupleTo(t, os);
}

template <typename T1>
void PrintTo(const ::std::tr1::tuple<T1>& t, ::std::ostream* os) {
  PrintTupleTo(t, os);
}

template <typename T1, typename T2>
void PrintTo(const ::std::tr1::tuple<T1, T2>& t, ::std::ostream* os) {
  PrintTupleTo(t, os);
}

template <typename T1, typename T2, typename T3>
void PrintTo(const ::std::tr1::tuple<T1, T2, T3>& t, ::std::ostream* os) {
  PrintTupleTo(t, os);
}

template <typename T1, typename T2, typename T3, typename T4>
void PrintTo(const ::std::tr1::tuple<T1, T2, T3, T4>& t, ::std::ostream* os) {
  PrintTupleTo(t, os);
}

template <typename T1, typename T2, typename T3, typename T4, typename T5>
void PrintTo(const ::std::tr1::tuple<T1, T2, T3, T4, T5>& t,
             ::std::ostream* os) {
  PrintTupleTo(t, os);
}

template <typename T1, typename T2, typename T3, typename T4, typename T5,
          typename T6>
void PrintTo(const ::std::tr1::tuple<T1, T2, T3, T4, T5, T6>& t,
             ::std::ostream* os) {
  PrintTupleTo(t, os);
}

template <typename T1, typename T2, typename T3, typename T4, typename T5,
          typename T6, typename T7>
void PrintTo(const ::std::tr1::tuple<T1, T2, T3, T4, T5, T6, T7>& t,
             ::std::ostream* os) {
  PrintTupleTo(t, os);
}

template <typename T1, typename T2, typename T3, typename T4, typename T5,
          typename T6, typename T7, typename T8>
void PrintTo(const ::std::tr1::tuple<T1, T2, T3, T4, T5, T6, T7, T8>& t,
             ::std::ostream* os) {
  PrintTupleTo(t, os);
}

template <typename T1, typename T2, typename T3, typename T4, typename T5,
          typename T6, typename T7, typename T8, typename T9>
void PrintTo(const ::std::tr1::tuple<T1, T2, T3, T4, T5, T6, T7, T8, T9>& t,
             ::std::ostream* os) {
  PrintTupleTo(t, os);
}

template <typename T1, typename T2, typename T3, typename T4, typename T5,
          typename T6, typename T7, typename T8, typename T9, typename T10>
void PrintTo(
    const ::std::tr1::tuple<T1, T2, T3, T4, T5, T6, T7, T8, T9, T10>& t,
    ::std::ostream* os) {
  PrintTupleTo(t, os);
}
#endif  // GTEST_HAS_TR1_TUPLE

#if GTEST_HAS_STD_TUPLE_
template <typename... Types>
void PrintTo(const ::std::tuple<Types...>& t, ::std::ostream* os) {
  PrintTupleTo(t, os);
}
#endif  // GTEST_HAS_STD_TUPLE_

// Overload for std::pair.
template <typename T1, typename T2>
void PrintTo(const ::std::pair<T1, T2>& value, ::std::ostream* os) {
  *os << '(';
  // We cannot use UniversalPrint(value.first, os) here, as T1 may be
  // a reference type.  The same for printing value.second.
  UniversalPrinter<T1>::Print(value.first, os);
  *os << ", ";
  UniversalPrinter<T2>::Print(value.second, os);
  *os << ')';
}

// Implements printing a non-reference type T by letting the compiler
// pick the right overload of PrintTo() for T.
template <typename T>
class UniversalPrinter {
 public:
  // MSVC warns about adding const to a function type, so we want to
  // disable the warning.
  GTEST_DISABLE_MSC_WARNINGS_PUSH_(4180)

  // Note: we deliberately don't call this PrintTo(), as that name
  // conflicts with ::testing::internal::PrintTo in the body of the
  // function.
  static void Print(const T& value, ::std::ostream* os) {
    // By default, ::testing::internal::PrintTo() is used for printing
    // the value.
    //
    // Thanks to Koenig look-up, if T is a class and has its own
    // PrintTo() function defined in its namespace, that function will
    // be visible here.  Since it is more specific than the generic ones
    // in ::testing::internal, it will be picked by the compiler in the
    // following statement - exactly what we want.
    PrintTo(value, os);
  }

  GTEST_DISABLE_MSC_WARNINGS_POP_()
};

// UniversalPrintArray(begin, len, os) prints an array of 'len'
// elements, starting at address 'begin'.
template <typename T>
void UniversalPrintArray(const T* begin, size_t len, ::std::ostream* os) {
  if (len == 0) {
    *os << "{}";
  } else {
    *os << "{ ";
    const size_t kThreshold = 18;
    const size_t kChunkSize = 8;
    // If the array has more than kThreshold elements, we'll have to
    // omit some details by printing only the first and the last
    // kChunkSize elements.
    // TODO(wan@google.com): let the user control the threshold using a flag.
    if (len <= kThreshold) {
      PrintRawArrayTo(begin, len, os);
    } else {
      PrintRawArrayTo(begin, kChunkSize, os);
      *os << ", ..., ";
      PrintRawArrayTo(begin + len - kChunkSize, kChunkSize, os);
    }
    *os << " }";
  }
}
// This overload prints a (const) char array compactly.
GTEST_API_ void UniversalPrintArray(
    const char* begin, size_t len, ::std::ostream* os);

// This overload prints a (const) wchar_t array compactly.
GTEST_API_ void UniversalPrintArray(
    const wchar_t* begin, size_t len, ::std::ostream* os);

// Implements printing an array type T[N].
template <typename T, size_t N>
class UniversalPrinter<T[N]> {
 public:
  // Prints the given array, omitting some elements when there are too
  // many.
  static void Print(const T (&a)[N], ::std::ostream* os) {
    UniversalPrintArray(a, N, os);
  }
};

// Implements printing a reference type T&.
template <typename T>
class UniversalPrinter<T&> {
 public:
  // MSVC warns about adding const to a function type, so we want to
  // disable the warning.
  GTEST_DISABLE_MSC_WARNINGS_PUSH_(4180)

  static void Print(const T& value, ::std::ostream* os) {
    // Prints the address of the value.  We use reinterpret_cast here
    // as static_cast doesn't compile when T is a function type.
    *os << "@" << reinterpret_cast<const void*>(&value) << " ";

    // Then prints the value itself.
    UniversalPrint(value, os);
  }

  GTEST_DISABLE_MSC_WARNINGS_POP_()
};

// Prints a value tersely: for a reference type, the referenced value
// (but not the address) is printed; for a (const) char pointer, the
// NUL-terminated string (but not the pointer) is printed.

template <typename T>
class UniversalTersePrinter {
 public:
  static void Print(const T& value, ::std::ostream* os) {
    UniversalPrint(value, os);
  }
};
template <typename T>
class UniversalTersePrinter<T&> {
 public:
  static void Print(const T& value, ::std::ostream* os) {
    UniversalPrint(value, os);
  }
};
template <typename T, size_t N>
class UniversalTersePrinter<T[N]> {
 public:
  static void Print(const T (&value)[N], ::std::ostream* os) {
    UniversalPrinter<T[N]>::Print(value, os);
  }
};
template <>
class UniversalTersePrinter<const char*> {
 public:
  static void Print(const char* str, ::std::ostream* os) {
    if (str == NULL) {
      *os << "NULL";
    } else {
      UniversalPrint(string(str), os);
    }
  }
};
template <>
class UniversalTersePrinter<char*> {
 public:
  static void Print(char* str, ::std::ostream* os) {
    UniversalTersePrinter<const char*>::Print(str, os);
  }
};

#if GTEST_HAS_STD_WSTRING
template <>
class UniversalTersePrinter<const wchar_t*> {
 public:
  static void Print(const wchar_t* str, ::std::ostream* os) {
    if (str == NULL) {
      *os << "NULL";
    } else {
      UniversalPrint(::std::wstring(str), os);
    }
  }
};
#endif

template <>
class UniversalTersePrinter<wchar_t*> {
 public:
  static void Print(wchar_t* str, ::std::ostream* os) {
    UniversalTersePrinter<const wchar_t*>::Print(str, os);
  }
};

template <typename T>
void UniversalTersePrint(const T& value, ::std::ostream* os) {
  UniversalTersePrinter<T>::Print(value, os);
}

// Prints a value using the type inferred by the compiler.  The
// difference between this and UniversalTersePrint() is that for a
// (const) char pointer, this prints both the pointer and the
// NUL-terminated string.
template <typename T>
void UniversalPrint(const T& value, ::std::ostream* os) {
  // A workarond for the bug in VC++ 7.1 that prevents us from instantiating
  // UniversalPrinter with T directly.
  typedef T T1;
  UniversalPrinter<T1>::Print(value, os);
}

typedef ::std::vector<string> Strings;

// TuplePolicy<TupleT> must provide:
// - tuple_size
//     size of tuple TupleT.
// - get<size_t I>(const TupleT& t)
//     static function extracting element I of tuple TupleT.
// - tuple_element<size_t I>::type
//     type of element I of tuple TupleT.
template <typename TupleT>
struct TuplePolicy;

#if GTEST_HAS_TR1_TUPLE
template <typename TupleT>
struct TuplePolicy {
  typedef TupleT Tuple;
  static const size_t tuple_size = ::std::tr1::tuple_size<Tuple>::value;

  template <size_t I>
  struct tuple_element : ::std::tr1::tuple_element<I, Tuple> {};

  template <size_t I>
  static typename AddReference<
      const typename ::std::tr1::tuple_element<I, Tuple>::type>::type get(
      const Tuple& tuple) {
    return ::std::tr1::get<I>(tuple);
  }
};
template <typename TupleT>
const size_t TuplePolicy<TupleT>::tuple_size;
#endif  // GTEST_HAS_TR1_TUPLE

#if GTEST_HAS_STD_TUPLE_
template <typename... Types>
struct TuplePolicy< ::std::tuple<Types...> > {
  typedef ::std::tuple<Types...> Tuple;
  static const size_t tuple_size = ::std::tuple_size<Tuple>::value;

  template <size_t I>
  struct tuple_element : ::std::tuple_element<I, Tuple> {};

  template <size_t I>
  static const typename ::std::tuple_element<I, Tuple>::type& get(
      const Tuple& tuple) {
    return ::std::get<I>(tuple);
  }
};
template <typename... Types>
const size_t TuplePolicy< ::std::tuple<Types...> >::tuple_size;
#endif  // GTEST_HAS_STD_TUPLE_

#if GTEST_HAS_TR1_TUPLE || GTEST_HAS_STD_TUPLE_
// This helper template allows PrintTo() for tuples and
// UniversalTersePrintTupleFieldsToStrings() to be defined by
// induction on the number of tuple fields.  The idea is that
// TuplePrefixPrinter<N>::PrintPrefixTo(t, os) prints the first N
// fields in tuple t, and can be defined in terms of
// TuplePrefixPrinter<N - 1>.
//
// The inductive case.
template <size_t N>
struct TuplePrefixPrinter {
  // Prints the first N fields of a tuple.
  template <typename Tuple>
  static void PrintPrefixTo(const Tuple& t, ::std::ostream* os) {
    TuplePrefixPrinter<N - 1>::PrintPrefixTo(t, os);
    GTEST_INTENTIONAL_CONST_COND_PUSH_()
    if (N > 1) {
    GTEST_INTENTIONAL_CONST_COND_POP_()
      *os << ", ";
    }
    UniversalPrinter<
        typename TuplePolicy<Tuple>::template tuple_element<N - 1>::type>
        ::Print(TuplePolicy<Tuple>::template get<N - 1>(t), os);
  }

  // Tersely prints the first N fields of a tuple to a string vector,
  // one element for each field.
  template <typename Tuple>
  static void TersePrintPrefixToStrings(const Tuple& t, Strings* strings) {
    TuplePrefixPrinter<N - 1>::TersePrintPrefixToStrings(t, strings);
    ::std::stringstream ss;
    UniversalTersePrint(TuplePolicy<Tuple>::template get<N - 1>(t), &ss);
    strings->push_back(ss.str());
  }
};

// Base case.
template <>
struct TuplePrefixPrinter<0> {
  template <typename Tuple>
  static void PrintPrefixTo(const Tuple&, ::std::ostream*) {}

  template <typename Tuple>
  static void TersePrintPrefixToStrings(const Tuple&, Strings*) {}
};

// Helper function for printing a tuple.
// Tuple must be either std::tr1::tuple or std::tuple type.
template <typename Tuple>
void PrintTupleTo(const Tuple& t, ::std::ostream* os) {
  *os << "(";
  TuplePrefixPrinter<TuplePolicy<Tuple>::tuple_size>::PrintPrefixTo(t, os);
  *os << ")";
}

// Prints the fields of a tuple tersely to a string vector, one
// element for each field.  See the comment before
// UniversalTersePrint() for how we define "tersely".
template <typename Tuple>
Strings UniversalTersePrintTupleFieldsToStrings(const Tuple& value) {
  Strings result;
  TuplePrefixPrinter<TuplePolicy<Tuple>::tuple_size>::
      TersePrintPrefixToStrings(value, &result);
  return result;
}
#endif  // GTEST_HAS_TR1_TUPLE || GTEST_HAS_STD_TUPLE_

}  // namespace internal

template <typename T>
::std::string PrintToString(const T& value) {
  ::std::stringstream ss;
  internal::UniversalTersePrinter<T>::Print(value, &ss);
  return ss.str();
}

}  // namespace testing

// Include any custom printer added by the local installation.
// We must include this header at the end to make sure it can use the
// declarations from this file.
// Copyright 2015, Google Inc.
// All rights reserved.
//
// Redistribution and use in source and binary forms, with or without
// modification, are permitted provided that the following conditions are
// met:
//
//     * Redistributions of source code must retain the above copyright
// notice, this list of conditions and the following disclaimer.
//     * Redistributions in binary form must reproduce the above
// copyright notice, this list of conditions and the following disclaimer
// in the documentation and/or other materials provided with the
// distribution.
//     * Neither the name of Google Inc. nor the names of its
// contributors may be used to endorse or promote products derived from
// this software without specific prior written permission.
//
// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
// "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
// LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
// A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
// OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
// SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
// LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
// DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
// THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
//
// This file provides an injection point for custom printers in a local
// installation of gTest.
// It will be included from gtest-printers.h and the overrides in this file
// will be visible to everyone.
// See documentation at gtest/gtest-printers.h for details on how to define a
// custom printer.
//
// ** Custom implementation starts here **

#ifndef GTEST_INCLUDE_GTEST_INTERNAL_CUSTOM_GTEST_PRINTERS_H_
#define GTEST_INCLUDE_GTEST_INTERNAL_CUSTOM_GTEST_PRINTERS_H_

#endif  // GTEST_INCLUDE_GTEST_INTERNAL_CUSTOM_GTEST_PRINTERS_H_

#endif  // GTEST_INCLUDE_GTEST_GTEST_PRINTERS_H_

#if GTEST_HAS_PARAM_TEST

namespace testing {

// Input to a parameterized test name generator, describing a test parameter.
// Consists of the parameter value and the integer parameter index.
template <class ParamType>
struct TestParamInfo {
  TestParamInfo(const ParamType& a_param, size_t an_index) :
    param(a_param),
    index(an_index) {}
  ParamType param;
  size_t index;
};

// A builtin parameterized test name generator which returns the result of
// testing::PrintToString.
struct PrintToStringParamName {
  template <class ParamType>
  std::string operator()(const TestParamInfo<ParamType>& info) const {
    return PrintToString(info.param);
  }
};

namespace internal {

// INTERNAL IMPLEMENTATION - DO NOT USE IN USER CODE.
//
// Outputs a message explaining invalid registration of different
// fixture class for the same test case. This may happen when
// TEST_P macro is used to define two tests with the same name
// but in different namespaces.
GTEST_API_ void ReportInvalidTestCaseType(const char* test_case_name,
                                          CodeLocation code_location);

template <typename> class ParamGeneratorInterface;
template <typename> class ParamGenerator;

// Interface for iterating over elements provided by an implementation
// of ParamGeneratorInterface<T>.
template <typename T>
class ParamIteratorInterface {
 public:
  virtual ~ParamIteratorInterface() {}
  // A pointer to the base generator instance.
  // Used only for the purposes of iterator comparison
  // to make sure that two iterators belong to the same generator.
  virtual const ParamGeneratorInterface<T>* BaseGenerator() const = 0;
  // Advances iterator to point to the next element
  // provided by the generator. The caller is responsible
  // for not calling Advance() on an iterator equal to
  // BaseGenerator()->End().
  virtual void Advance() = 0;
  // Clones the iterator object. Used for implementing copy semantics
  // of ParamIterator<T>.
  virtual ParamIteratorInterface* Clone() const = 0;
  // Dereferences the current iterator and provides (read-only) access
  // to the pointed value. It is the caller's responsibility not to call
  // Current() on an iterator equal to BaseGenerator()->End().
  // Used for implementing ParamGenerator<T>::operator*().
  virtual const T* Current() const = 0;
  // Determines whether the given iterator and other point to the same
  // element in the sequence generated by the generator.
  // Used for implementing ParamGenerator<T>::operator==().
  virtual bool Equals(const ParamIteratorInterface& other) const = 0;
};

// Class iterating over elements provided by an implementation of
// ParamGeneratorInterface<T>. It wraps ParamIteratorInterface<T>
// and implements the const forward iterator concept.
template <typename T>
class ParamIterator {
 public:
  typedef T value_type;
  typedef const T& reference;
  typedef ptrdiff_t difference_type;

  // ParamIterator assumes ownership of the impl_ pointer.
  ParamIterator(const ParamIterator& other) : impl_(other.impl_->Clone()) {}
  ParamIterator& operator=(const ParamIterator& other) {
    if (this != &other)
      impl_.reset(other.impl_->Clone());
    return *this;
  }

  const T& operator*() const { return *impl_->Current(); }
  const T* operator->() const { return impl_->Current(); }
  // Prefix version of operator++.
  ParamIterator& operator++() {
    impl_->Advance();
    return *this;
  }
  // Postfix version of operator++.
  ParamIterator operator++(int /*unused*/) {
    ParamIteratorInterface<T>* clone = impl_->Clone();
    impl_->Advance();
    return ParamIterator(clone);
  }
  bool operator==(const ParamIterator& other) const {
    return impl_.get() == other.impl_.get() || impl_->Equals(*other.impl_);
  }
  bool operator!=(const ParamIterator& other) const {
    return !(*this == other);
  }

 private:
  friend class ParamGenerator<T>;
  explicit ParamIterator(ParamIteratorInterface<T>* impl) : impl_(impl) {}
  scoped_ptr<ParamIteratorInterface<T> > impl_;
};

// ParamGeneratorInterface<T> is the binary interface to access generators
// defined in other translation units.
template <typename T>
class ParamGeneratorInterface {
 public:
  typedef T ParamType;

  virtual ~ParamGeneratorInterface() {}

  // Generator interface definition
  virtual ParamIteratorInterface<T>* Begin() const = 0;
  virtual ParamIteratorInterface<T>* End() const = 0;
};

// Wraps ParamGeneratorInterface<T> and provides general generator syntax
// compatible with the STL Container concept.
// This class implements copy initialization semantics and the contained
// ParamGeneratorInterface<T> instance is shared among all copies
// of the original object. This is possible because that instance is immutable.
template<typename T>
class ParamGenerator {
 public:
  typedef ParamIterator<T> iterator;

  explicit ParamGenerator(ParamGeneratorInterface<T>* impl) : impl_(impl) {}
  ParamGenerator(const ParamGenerator& other) : impl_(other.impl_) {}

  ParamGenerator& operator=(const ParamGenerator& other) {
    impl_ = other.impl_;
    return *this;
  }

  iterator begin() const { return iterator(impl_->Begin()); }
  iterator end() const { return iterator(impl_->End()); }

 private:
  linked_ptr<const ParamGeneratorInterface<T> > impl_;
};

// Generates values from a range of two comparable values. Can be used to
// generate sequences of user-defined types that implement operator+() and
// operator<().
// This class is used in the Range() function.
template <typename T, typename IncrementT>
class RangeGenerator : public ParamGeneratorInterface<T> {
 public:
  RangeGenerator(T begin, T end, IncrementT step)
      : begin_(begin), end_(end),
        step_(step), end_index_(CalculateEndIndex(begin, end, step)) {}
  virtual ~RangeGenerator() {}

  virtual ParamIteratorInterface<T>* Begin() const {
    return new Iterator(this, begin_, 0, step_);
  }
  virtual ParamIteratorInterface<T>* End() const {
    return new Iterator(this, end_, end_index_, step_);
  }

 private:
  class Iterator : public ParamIteratorInterface<T> {
   public:
    Iterator(const ParamGeneratorInterface<T>* base, T value, int index,
             IncrementT step)
        : base_(base), value_(value), index_(index), step_(step) {}
    virtual ~Iterator() {}

    virtual const ParamGeneratorInterface<T>* BaseGenerator() const {
      return base_;
    }
    virtual void Advance() {
      value_ = static_cast<T>(value_ + step_);
      index_++;
    }
    virtual ParamIteratorInterface<T>* Clone() const {
      return new Iterator(*this);
    }
    virtual const T* Current() const { return &value_; }
    virtual bool Equals(const ParamIteratorInterface<T>& other) const {
      // Having the same base generator guarantees that the other
      // iterator is of the same type and we can downcast.
      GTEST_CHECK_(BaseGenerator() == other.BaseGenerator())
          << "The program attempted to compare iterators "
          << "from different generators." << std::endl;
      const int other_index =
          CheckedDowncastToActualType<const Iterator>(&other)->index_;
      return index_ == other_index;
    }

   private:
    Iterator(const Iterator& other)
        : ParamIteratorInterface<T>(),
          base_(other.base_), value_(other.value_), index_(other.index_),
          step_(other.step_) {}

    // No implementation - assignment is unsupported.
    void operator=(const Iterator& other);

    const ParamGeneratorInterface<T>* const base_;
    T value_;
    int index_;
    const IncrementT step_;
  };  // class RangeGenerator::Iterator

  static int CalculateEndIndex(const T& begin,
                               const T& end,
                               const IncrementT& step) {
    int end_index = 0;
    for (T i = begin; i < end; i = static_cast<T>(i + step))
      end_index++;
    return end_index;
  }

  // No implementation - assignment is unsupported.
  void operator=(const RangeGenerator& other);

  const T begin_;
  const T end_;
  const IncrementT step_;
  // The index for the end() iterator. All the elements in the generated
  // sequence are indexed (0-based) to aid iterator comparison.
  const int end_index_;
};  // class RangeGenerator


// Generates values from a pair of STL-style iterators. Used in the
// ValuesIn() function. The elements are copied from the source range
// since the source can be located on the stack, and the generator
// is likely to persist beyond that stack frame.
template <typename T>
class ValuesInIteratorRangeGenerator : public ParamGeneratorInterface<T> {
 public:
  template <typename ForwardIterator>
  ValuesInIteratorRangeGenerator(ForwardIterator begin, ForwardIterator end)
      : container_(begin, end) {}
  virtual ~ValuesInIteratorRangeGenerator() {}

  virtual ParamIteratorInterface<T>* Begin() const {
    return new Iterator(this, container_.begin());
  }
  virtual ParamIteratorInterface<T>* End() const {
    return new Iterator(this, container_.end());
  }

 private:
  typedef typename ::std::vector<T> ContainerType;

  class Iterator : public ParamIteratorInterface<T> {
   public:
    Iterator(const ParamGeneratorInterface<T>* base,
             typename ContainerType::const_iterator iterator)
        : base_(base), iterator_(iterator) {}
    virtual ~Iterator() {}

    virtual const ParamGeneratorInterface<T>* BaseGenerator() const {
      return base_;
    }
    virtual void Advance() {
      ++iterator_;
      value_.reset();
    }
    virtual ParamIteratorInterface<T>* Clone() const {
      return new Iterator(*this);
    }
    // We need to use cached value referenced by iterator_ because *iterator_
    // can return a temporary object (and of type other then T), so just
    // having "return &*iterator_;" doesn't work.
    // value_ is updated here and not in Advance() because Advance()
    // can advance iterator_ beyond the end of the range, and we cannot
    // detect that fact. The client code, on the other hand, is
    // responsible for not calling Current() on an out-of-range iterator.
    virtual const T* Current() const {
      if (value_.get() == NULL)
        value_.reset(new T(*iterator_));
      return value_.get();
    }
    virtual bool Equals(const ParamIteratorInterface<T>& other) const {
      // Having the same base generator guarantees that the other
      // iterator is of the same type and we can downcast.
      GTEST_CHECK_(BaseGenerator() == other.BaseGenerator())
          << "The program attempted to compare iterators "
          << "from different generators." << std::endl;
      return iterator_ ==
          CheckedDowncastToActualType<const Iterator>(&other)->iterator_;
    }

   private:
    Iterator(const Iterator& other)
          // The explicit constructor call suppresses a false warning
          // emitted by gcc when supplied with the -Wextra option.
        : ParamIteratorInterface<T>(),
          base_(other.base_),
          iterator_(other.iterator_) {}

    const ParamGeneratorInterface<T>* const base_;
    typename ContainerType::const_iterator iterator_;
    // A cached value of *iterator_. We keep it here to allow access by
    // pointer in the wrapping iterator's operator->().
    // value_ needs to be mutable to be accessed in Current().
    // Use of scoped_ptr helps manage cached value's lifetime,
    // which is bound by the lifespan of the iterator itself.
    mutable scoped_ptr<const T> value_;
  };  // class ValuesInIteratorRangeGenerator::Iterator

  // No implementation - assignment is unsupported.
  void operator=(const ValuesInIteratorRangeGenerator& other);

  const ContainerType container_;
};  // class ValuesInIteratorRangeGenerator

// INTERNAL IMPLEMENTATION - DO NOT USE IN USER CODE.
//
// Default parameterized test name generator, returns a string containing the
// integer test parameter index.
template <class ParamType>
std::string DefaultParamName(const TestParamInfo<ParamType>& info) {
  Message name_stream;
  name_stream << info.index;
  return name_stream.GetString();
}

// INTERNAL IMPLEMENTATION - DO NOT USE IN USER CODE.
//
// Parameterized test name overload helpers, which help the
// INSTANTIATE_TEST_CASE_P macro choose between the default parameterized
// test name generator and user param name generator.
template <class ParamType, class ParamNameGenFunctor>
ParamNameGenFunctor GetParamNameGen(ParamNameGenFunctor func) {
  return func;
}

template <class ParamType>
struct ParamNameGenFunc {
  typedef std::string Type(const TestParamInfo<ParamType>&);
};

template <class ParamType>
typename ParamNameGenFunc<ParamType>::Type *GetParamNameGen() {
  return DefaultParamName;
}

// INTERNAL IMPLEMENTATION - DO NOT USE IN USER CODE.
//
// Stores a parameter value and later creates tests parameterized with that
// value.
template <class TestClass>
class ParameterizedTestFactory : public TestFactoryBase {
 public:
  typedef typename TestClass::ParamType ParamType;
  explicit ParameterizedTestFactory(ParamType parameter) :
      parameter_(parameter) {}
  virtual Test* CreateTest() {
    TestClass::SetParam(&parameter_);
    return new TestClass();
  }

 private:
  const ParamType parameter_;

  GTEST_DISALLOW_COPY_AND_ASSIGN_(ParameterizedTestFactory);
};

// INTERNAL IMPLEMENTATION - DO NOT USE IN USER CODE.
//
// TestMetaFactoryBase is a base class for meta-factories that create
// test factories for passing into MakeAndRegisterTestInfo function.
template <class ParamType>
class TestMetaFactoryBase {
 public:
  virtual ~TestMetaFactoryBase() {}

  virtual TestFactoryBase* CreateTestFactory(ParamType parameter) = 0;
};

// INTERNAL IMPLEMENTATION - DO NOT USE IN USER CODE.
//
// TestMetaFactory creates test factories for passing into
// MakeAndRegisterTestInfo function. Since MakeAndRegisterTestInfo receives
// ownership of test factory pointer, same factory object cannot be passed
// into that method twice. But ParameterizedTestCaseInfo is going to call
// it for each Test/Parameter value combination. Thus it needs meta factory
// creator class.
template <class TestCase>
class TestMetaFactory
    : public TestMetaFactoryBase<typename TestCase::ParamType> {
 public:
  typedef typename TestCase::ParamType ParamType;

  TestMetaFactory() {}

  virtual TestFactoryBase* CreateTestFactory(ParamType parameter) {
    return new ParameterizedTestFactory<TestCase>(parameter);
  }

 private:
  GTEST_DISALLOW_COPY_AND_ASSIGN_(TestMetaFactory);
};

// INTERNAL IMPLEMENTATION - DO NOT USE IN USER CODE.
//
// ParameterizedTestCaseInfoBase is a generic interface
// to ParameterizedTestCaseInfo classes. ParameterizedTestCaseInfoBase
// accumulates test information provided by TEST_P macro invocations
// and generators provided by INSTANTIATE_TEST_CASE_P macro invocations
// and uses that information to register all resulting test instances
// in RegisterTests method. The ParameterizeTestCaseRegistry class holds
// a collection of pointers to the ParameterizedTestCaseInfo objects
// and calls RegisterTests() on each of them when asked.
class ParameterizedTestCaseInfoBase {
 public:
  virtual ~ParameterizedTestCaseInfoBase() {}

  // Base part of test case name for display purposes.
  virtual const string& GetTestCaseName() const = 0;
  // Test case id to verify identity.
  virtual TypeId GetTestCaseTypeId() const = 0;
  // UnitTest class invokes this method to register tests in this
  // test case right before running them in RUN_ALL_TESTS macro.
  // This method should not be called more then once on any single
  // instance of a ParameterizedTestCaseInfoBase derived class.
  virtual void RegisterTests() = 0;

 protected:
  ParameterizedTestCaseInfoBase() {}

 private:
  GTEST_DISALLOW_COPY_AND_ASSIGN_(ParameterizedTestCaseInfoBase);
};

// INTERNAL IMPLEMENTATION - DO NOT USE IN USER CODE.
//
// ParameterizedTestCaseInfo accumulates tests obtained from TEST_P
// macro invocations for a particular test case and generators
// obtained from INSTANTIATE_TEST_CASE_P macro invocations for that
// test case. It registers tests with all values generated by all
// generators when asked.
template <class TestCase>
class ParameterizedTestCaseInfo : public ParameterizedTestCaseInfoBase {
 public:
  // ParamType and GeneratorCreationFunc are private types but are required
  // for declarations of public methods AddTestPattern() and
  // AddTestCaseInstantiation().
  typedef typename TestCase::ParamType ParamType;
  // A function that returns an instance of appropriate generator type.
  typedef ParamGenerator<ParamType>(GeneratorCreationFunc)();
  typedef typename ParamNameGenFunc<ParamType>::Type ParamNameGeneratorFunc;

  explicit ParameterizedTestCaseInfo(
      const char* name, CodeLocation code_location)
      : test_case_name_(name), code_location_(code_location) {}

  // Test case base name for display purposes.
  virtual const string& GetTestCaseName() const { return test_case_name_; }
  // Test case id to verify identity.
  virtual TypeId GetTestCaseTypeId() const { return GetTypeId<TestCase>(); }
  // TEST_P macro uses AddTestPattern() to record information
  // about a single test in a LocalTestInfo structure.
  // test_case_name is the base name of the test case (without invocation
  // prefix). test_base_name is the name of an individual test without
  // parameter index. For the test SequenceA/FooTest.DoBar/1 FooTest is
  // test case base name and DoBar is test base name.
  void AddTestPattern(const char* test_case_name,
                      const char* test_base_name,
                      TestMetaFactoryBase<ParamType>* meta_factory) {
    tests_.push_back(linked_ptr<TestInfo>(new TestInfo(test_case_name,
                                                       test_base_name,
                                                       meta_factory)));
  }
  // INSTANTIATE_TEST_CASE_P macro uses AddGenerator() to record information
  // about a generator.
  int AddTestCaseInstantiation(const string& instantiation_name,
                               GeneratorCreationFunc* func,
                               ParamNameGeneratorFunc* name_func,
                               const char* file,
                               int line) {
    instantiations_.push_back(
        InstantiationInfo(instantiation_name, func, name_func, file, line));
    return 0;  // Return value used only to run this method in namespace scope.
  }
  // UnitTest class invokes this method to register tests in this test case
  // test cases right before running tests in RUN_ALL_TESTS macro.
  // This method should not be called more then once on any single
  // instance of a ParameterizedTestCaseInfoBase derived class.
  // UnitTest has a guard to prevent from calling this method more then once.
  virtual void RegisterTests() {
    for (typename TestInfoContainer::iterator test_it = tests_.begin();
         test_it != tests_.end(); ++test_it) {
      linked_ptr<TestInfo> test_info = *test_it;
      for (typename InstantiationContainer::iterator gen_it =
               instantiations_.begin(); gen_it != instantiations_.end();
               ++gen_it) {
        const string& instantiation_name = gen_it->name;
        ParamGenerator<ParamType> generator((*gen_it->generator)());
        ParamNameGeneratorFunc* name_func = gen_it->name_func;
        const char* file = gen_it->file;
        int line = gen_it->line;

        string test_case_name;
        if ( !instantiation_name.empty() )
          test_case_name = instantiation_name + "/";
        test_case_name += test_info->test_case_base_name;

        size_t i = 0;
        std::set<std::string> test_param_names;
        for (typename ParamGenerator<ParamType>::iterator param_it =
                 generator.begin();
             param_it != generator.end(); ++param_it, ++i) {
          Message test_name_stream;

          std::string param_name = name_func(
              TestParamInfo<ParamType>(*param_it, i));

          GTEST_CHECK_(IsValidParamName(param_name))
              << "Parameterized test name '" << param_name
              << "' is invalid, in " << file
              << " line " << line << std::endl;

          GTEST_CHECK_(test_param_names.count(param_name) == 0)
              << "Duplicate parameterized test name '" << param_name
              << "', in " << file << " line " << line << std::endl;

          test_param_names.insert(param_name);

          test_name_stream << test_info->test_base_name << "/" << param_name;
          MakeAndRegisterTestInfo(
              test_case_name.c_str(),
              test_name_stream.GetString().c_str(),
              NULL,  // No type parameter.
              PrintToString(*param_it).c_str(),
              code_location_,
              GetTestCaseTypeId(),
              TestCase::SetUpTestCase,
              TestCase::TearDownTestCase,
              test_info->test_meta_factory->CreateTestFactory(*param_it));
        }  // for param_it
      }  // for gen_it
    }  // for test_it
  }  // RegisterTests

 private:
  // LocalTestInfo structure keeps information about a single test registered
  // with TEST_P macro.
  struct TestInfo {
    TestInfo(const char* a_test_case_base_name,
             const char* a_test_base_name,
             TestMetaFactoryBase<ParamType>* a_test_meta_factory) :
        test_case_base_name(a_test_case_base_name),
        test_base_name(a_test_base_name),
        test_meta_factory(a_test_meta_factory) {}

    const string test_case_base_name;
    const string test_base_name;
    const scoped_ptr<TestMetaFactoryBase<ParamType> > test_meta_factory;
  };
  typedef ::std::vector<linked_ptr<TestInfo> > TestInfoContainer;
  // Records data received from INSTANTIATE_TEST_CASE_P macros:
  //  <Instantiation name, Sequence generator creation function,
  //     Name generator function, Source file, Source line>
  struct InstantiationInfo {
      InstantiationInfo(const std::string &name_in,
                        GeneratorCreationFunc* generator_in,
                        ParamNameGeneratorFunc* name_func_in,
                        const char* file_in,
                        int line_in)
          : name(name_in),
            generator(generator_in),
            name_func(name_func_in),
            file(file_in),
            line(line_in) {}

      std::string name;
      GeneratorCreationFunc* generator;
      ParamNameGeneratorFunc* name_func;
      const char* file;
      int line;
  };
  typedef ::std::vector<InstantiationInfo> InstantiationContainer;

  static bool IsValidParamName(const std::string& name) {
    // Check for empty string
    if (name.empty())
      return false;

    // Check for invalid characters
    for (std::string::size_type index = 0; index < name.size(); ++index) {
      if (!isalnum(name[index]) && name[index] != '_')
        return false;
    }

    return true;
  }

  const string test_case_name_;
  CodeLocation code_location_;
  TestInfoContainer tests_;
  InstantiationContainer instantiations_;

  GTEST_DISALLOW_COPY_AND_ASSIGN_(ParameterizedTestCaseInfo);
};  // class ParameterizedTestCaseInfo

// INTERNAL IMPLEMENTATION - DO NOT USE IN USER CODE.
//
// ParameterizedTestCaseRegistry contains a map of ParameterizedTestCaseInfoBase
// classes accessed by test case names. TEST_P and INSTANTIATE_TEST_CASE_P
// macros use it to locate their corresponding ParameterizedTestCaseInfo
// descriptors.
class ParameterizedTestCaseRegistry {
 public:
  ParameterizedTestCaseRegistry() {}
  ~ParameterizedTestCaseRegistry() {
    for (TestCaseInfoContainer::iterator it = test_case_infos_.begin();
         it != test_case_infos_.end(); ++it) {
      delete *it;
    }
  }

  // Looks up or creates and returns a structure containing information about
  // tests and instantiations of a particular test case.
  template <class TestCase>
  ParameterizedTestCaseInfo<TestCase>* GetTestCasePatternHolder(
      const char* test_case_name,
      CodeLocation code_location) {
    ParameterizedTestCaseInfo<TestCase>* typed_test_info = NULL;
    for (TestCaseInfoContainer::iterator it = test_case_infos_.begin();
         it != test_case_infos_.end(); ++it) {
      if ((*it)->GetTestCaseName() == test_case_name) {
        if ((*it)->GetTestCaseTypeId() != GetTypeId<TestCase>()) {
          // Complain about incorrect usage of Google Test facilities
          // and terminate the program since we cannot guaranty correct
          // test case setup and tear-down in this case.
          ReportInvalidTestCaseType(test_case_name, code_location);
          posix::Abort();
        } else {
          // At this point we are sure that the object we found is of the same
          // type we are looking for, so we downcast it to that type
          // without further checks.
          typed_test_info = CheckedDowncastToActualType<
              ParameterizedTestCaseInfo<TestCase> >(*it);
        }
        break;
      }
    }
    if (typed_test_info == NULL) {
      typed_test_info = new ParameterizedTestCaseInfo<TestCase>(
          test_case_name, code_location);
      test_case_infos_.push_back(typed_test_info);
    }
    return typed_test_info;
  }
  void RegisterTests() {
    for (TestCaseInfoContainer::iterator it = test_case_infos_.begin();
         it != test_case_infos_.end(); ++it) {
      (*it)->RegisterTests();
    }
  }

 private:
  typedef ::std::vector<ParameterizedTestCaseInfoBase*> TestCaseInfoContainer;

  TestCaseInfoContainer test_case_infos_;

  GTEST_DISALLOW_COPY_AND_ASSIGN_(ParameterizedTestCaseRegistry);
};

}  // namespace internal
}  // namespace testing

#endif  //  GTEST_HAS_PARAM_TEST

#endif  // GTEST_INCLUDE_GTEST_INTERNAL_GTEST_PARAM_UTIL_H_
// This file was GENERATED by command:
//     pump.py gtest-param-util-generated.h.pump
// DO NOT EDIT BY HAND!!!

// Copyright 2008 Google Inc.
// All Rights Reserved.
//
// Redistribution and use in source and binary forms, with or without
// modification, are permitted provided that the following conditions are
// met:
//
//     * Redistributions of source code must retain the above copyright
// notice, this list of conditions and the following disclaimer.
//     * Redistributions in binary form must reproduce the above
// copyright notice, this list of conditions and the following disclaimer
// in the documentation and/or other materials provided with the
// distribution.
//     * Neither the name of Google Inc. nor the names of its
// contributors may be used to endorse or promote products derived from
// this software without specific prior written permission.
//
// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
// "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
// LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
// A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
// OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
// SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
// LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
// DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
// THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
//
// Author: vladl@google.com (Vlad Losev)

// Type and function utilities for implementing parameterized tests.
// This file is generated by a SCRIPT.  DO NOT EDIT BY HAND!
//
// Currently Google Test supports at most 50 arguments in Values,
// and at most 10 arguments in Combine. Please contact
// googletestframework@googlegroups.com if you need more.
// Please note that the number of arguments to Combine is limited
// by the maximum arity of the implementation of tuple which is
// currently set at 10.

#ifndef GTEST_INCLUDE_GTEST_INTERNAL_GTEST_PARAM_UTIL_GENERATED_H_
#define GTEST_INCLUDE_GTEST_INTERNAL_GTEST_PARAM_UTIL_GENERATED_H_

// scripts/fuse_gtest.py depends on gtest's own header being #included
// *unconditionally*.  Therefore these #includes cannot be moved
// inside #if GTEST_HAS_PARAM_TEST.

#if GTEST_HAS_PARAM_TEST

namespace testing {

// Forward declarations of ValuesIn(), which is implemented in
// include/gtest/gtest-param-test.h.
template <typename ForwardIterator>
internal::ParamGenerator<
  typename ::testing::internal::IteratorTraits<ForwardIterator>::value_type>
ValuesIn(ForwardIterator begin, ForwardIterator end);

template <typename T, size_t N>
internal::ParamGenerator<T> ValuesIn(const T (&array)[N]);

template <class Container>
internal::ParamGenerator<typename Container::value_type> ValuesIn(
    const Container& container);

namespace internal {

// Used in the Values() function to provide polymorphic capabilities.
template <typename T1>
class ValueArray1 {
 public:
  explicit ValueArray1(T1 v1) : v1_(v1) {}

  template <typename T>
  operator ParamGenerator<T>() const {
    const T array[] = {static_cast<T>(v1_)};
    return ValuesIn(array);
  }

 private:
  // No implementation - assignment is unsupported.
  void operator=(const ValueArray1& other);

  const T1 v1_;
};

template <typename T1, typename T2>
class ValueArray2 {
 public:
  ValueArray2(T1 v1, T2 v2) : v1_(v1), v2_(v2) {}

  template <typename T>
  operator ParamGenerator<T>() const {
    const T array[] = {static_cast<T>(v1_), static_cast<T>(v2_)};
    return ValuesIn(array);
  }

 private:
  // No implementation - assignment is unsupported.
  void operator=(const ValueArray2& other);

  const T1 v1_;
  const T2 v2_;
};

template <typename T1, typename T2, typename T3>
class ValueArray3 {
 public:
  ValueArray3(T1 v1, T2 v2, T3 v3) : v1_(v1), v2_(v2), v3_(v3) {}

  template <typename T>
  operator ParamGenerator<T>() const {
    const T array[] = {static_cast<T>(v1_), static_cast<T>(v2_),
        static_cast<T>(v3_)};
    return ValuesIn(array);
  }

 private:
  // No implementation - assignment is unsupported.
  void operator=(const ValueArray3& other);

  const T1 v1_;
  const T2 v2_;
  const T3 v3_;
};

template <typename T1, typename T2, typename T3, typename T4>
class ValueArray4 {
 public:
  ValueArray4(T1 v1, T2 v2, T3 v3, T4 v4) : v1_(v1), v2_(v2), v3_(v3),
      v4_(v4) {}

  template <typename T>
  operator ParamGenerator<T>() const {
    const T array[] = {static_cast<T>(v1_), static_cast<T>(v2_),
        static_cast<T>(v3_), static_cast<T>(v4_)};
    return ValuesIn(array);
  }

 private:
  // No implementation - assignment is unsupported.
  void operator=(const ValueArray4& other);

  const T1 v1_;
  const T2 v2_;
  const T3 v3_;
  const T4 v4_;
};

template <typename T1, typename T2, typename T3, typename T4, typename T5>
class ValueArray5 {
 public:
  ValueArray5(T1 v1, T2 v2, T3 v3, T4 v4, T5 v5) : v1_(v1), v2_(v2), v3_(v3),
      v4_(v4), v5_(v5) {}

  template <typename T>
  operator ParamGenerator<T>() const {
    const T array[] = {static_cast<T>(v1_), static_cast<T>(v2_),
        static_cast<T>(v3_), static_cast<T>(v4_), static_cast<T>(v5_)};
    return ValuesIn(array);
  }

 private:
  // No implementation - assignment is unsupported.
  void operator=(const ValueArray5& other);

  const T1 v1_;
  const T2 v2_;
  const T3 v3_;
  const T4 v4_;
  const T5 v5_;
};

template <typename T1, typename T2, typename T3, typename T4, typename T5,
    typename T6>
class ValueArray6 {
 public:
  ValueArray6(T1 v1, T2 v2, T3 v3, T4 v4, T5 v5, T6 v6) : v1_(v1), v2_(v2),
      v3_(v3), v4_(v4), v5_(v5), v6_(v6) {}

  template <typename T>
  operator ParamGenerator<T>() const {
    const T array[] = {static_cast<T>(v1_), static_cast<T>(v2_),
        static_cast<T>(v3_), static_cast<T>(v4_), static_cast<T>(v5_),
        static_cast<T>(v6_)};
    return ValuesIn(array);
  }

 private:
  // No implementation - assignment is unsupported.
  void operator=(const ValueArray6& other);

  const T1 v1_;
  const T2 v2_;
  const T3 v3_;
  const T4 v4_;
  const T5 v5_;
  const T6 v6_;
};

template <typename T1, typename T2, typename T3, typename T4, typename T5,
    typename T6, typename T7>
class ValueArray7 {
 public:
  ValueArray7(T1 v1, T2 v2, T3 v3, T4 v4, T5 v5, T6 v6, T7 v7) : v1_(v1),
      v2_(v2), v3_(v3), v4_(v4), v5_(v5), v6_(v6), v7_(v7) {}

  template <typename T>
  operator ParamGenerator<T>() const {
    const T array[] = {static_cast<T>(v1_), static_cast<T>(v2_),
        static_cast<T>(v3_), static_cast<T>(v4_), static_cast<T>(v5_),
        static_cast<T>(v6_), static_cast<T>(v7_)};
    return ValuesIn(array);
  }

 private:
  // No implementation - assignment is unsupported.
  void operator=(const ValueArray7& other);

  const T1 v1_;
  const T2 v2_;
  const T3 v3_;
  const T4 v4_;
  const T5 v5_;
  const T6 v6_;
  const T7 v7_;
};

template <typename T1, typename T2, typename T3, typename T4, typename T5,
    typename T6, typename T7, typename T8>
class ValueArray8 {
 public:
  ValueArray8(T1 v1, T2 v2, T3 v3, T4 v4, T5 v5, T6 v6, T7 v7,
      T8 v8) : v1_(v1), v2_(v2), v3_(v3), v4_(v4), v5_(v5), v6_(v6), v7_(v7),
      v8_(v8) {}

  template <typename T>
  operator ParamGenerator<T>() const {
    const T array[] = {static_cast<T>(v1_), static_cast<T>(v2_),
        static_cast<T>(v3_), static_cast<T>(v4_), static_cast<T>(v5_),
        static_cast<T>(v6_), static_cast<T>(v7_), static_cast<T>(v8_)};
    return ValuesIn(array);
  }

 private:
  // No implementation - assignment is unsupported.
  void operator=(const ValueArray8& other);

  const T1 v1_;
  const T2 v2_;
  const T3 v3_;
  const T4 v4_;
  const T5 v5_;
  const T6 v6_;
  const T7 v7_;
  const T8 v8_;
};

template <typename T1, typename T2, typename T3, typename T4, typename T5,
    typename T6, typename T7, typename T8, typename T9>
class ValueArray9 {
 public:
  ValueArray9(T1 v1, T2 v2, T3 v3, T4 v4, T5 v5, T6 v6, T7 v7, T8 v8,
      T9 v9) : v1_(v1), v2_(v2), v3_(v3), v4_(v4), v5_(v5), v6_(v6), v7_(v7),
      v8_(v8), v9_(v9) {}

  template <typename T>
  operator ParamGenerator<T>() const {
    const T array[] = {static_cast<T>(v1_), static_cast<T>(v2_),
        static_cast<T>(v3_), static_cast<T>(v4_), static_cast<T>(v5_),
        static_cast<T>(v6_), static_cast<T>(v7_), static_cast<T>(v8_),
        static_cast<T>(v9_)};
    return ValuesIn(array);
  }

 private:
  // No implementation - assignment is unsupported.
  void operator=(const ValueArray9& other);

  const T1 v1_;
  const T2 v2_;
  const T3 v3_;
  const T4 v4_;
  const T5 v5_;
  const T6 v6_;
  const T7 v7_;
  const T8 v8_;
  const T9 v9_;
};

template <typename T1, typename T2, typename T3, typename T4, typename T5,
    typename T6, typename T7, typename T8, typename T9, typename T10>
class ValueArray10 {
 public:
  ValueArray10(T1 v1, T2 v2, T3 v3, T4 v4, T5 v5, T6 v6, T7 v7, T8 v8, T9 v9,
      T10 v10) : v1_(v1), v2_(v2), v3_(v3), v4_(v4), v5_(v5), v6_(v6), v7_(v7),
      v8_(v8), v9_(v9), v10_(v10) {}

  template <typename T>
  operator ParamGenerator<T>() const {
    const T array[] = {static_cast<T>(v1_), static_cast<T>(v2_),
        static_cast<T>(v3_), static_cast<T>(v4_), static_cast<T>(v5_),
        static_cast<T>(v6_), static_cast<T>(v7_), static_cast<T>(v8_),
        static_cast<T>(v9_), static_cast<T>(v10_)};
    return ValuesIn(array);
  }

 private:
  // No implementation - assignment is unsupported.
  void operator=(const ValueArray10& other);

  const T1 v1_;
  const T2 v2_;
  const T3 v3_;
  const T4 v4_;
  const T5 v5_;
  const T6 v6_;
  const T7 v7_;
  const T8 v8_;
  const T9 v9_;
  const T10 v10_;
};

template <typename T1, typename T2, typename T3, typename T4, typename T5,
    typename T6, typename T7, typename T8, typename T9, typename T10,
    typename T11>
class ValueArray11 {
 public:
  ValueArray11(T1 v1, T2 v2, T3 v3, T4 v4, T5 v5, T6 v6, T7 v7, T8 v8, T9 v9,
      T10 v10, T11 v11) : v1_(v1), v2_(v2), v3_(v3), v4_(v4), v5_(v5), v6_(v6),
      v7_(v7), v8_(v8), v9_(v9), v10_(v10), v11_(v11) {}

  template <typename T>
  operator ParamGenerator<T>() const {
    const T array[] = {static_cast<T>(v1_), static_cast<T>(v2_),
        static_cast<T>(v3_), static_cast<T>(v4_), static_cast<T>(v5_),
        static_cast<T>(v6_), static_cast<T>(v7_), static_cast<T>(v8_),
        static_cast<T>(v9_), static_cast<T>(v10_), static_cast<T>(v11_)};
    return ValuesIn(array);
  }

 private:
  // No implementation - assignment is unsupported.
  void operator=(const ValueArray11& other);

  const T1 v1_;
  const T2 v2_;
  const T3 v3_;
  const T4 v4_;
  const T5 v5_;
  const T6 v6_;
  const T7 v7_;
  const T8 v8_;
  const T9 v9_;
  const T10 v10_;
  const T11 v11_;
};

template <typename T1, typename T2, typename T3, typename T4, typename T5,
    typename T6, typename T7, typename T8, typename T9, typename T10,
    typename T11, typename T12>
class ValueArray12 {
 public:
  ValueArray12(T1 v1, T2 v2, T3 v3, T4 v4, T5 v5, T6 v6, T7 v7, T8 v8, T9 v9,
      T10 v10, T11 v11, T12 v12) : v1_(v1), v2_(v2), v3_(v3), v4_(v4), v5_(v5),
      v6_(v6), v7_(v7), v8_(v8), v9_(v9), v10_(v10), v11_(v11), v12_(v12) {}

  template <typename T>
  operator ParamGenerator<T>() const {
    const T array[] = {static_cast<T>(v1_), static_cast<T>(v2_),
        static_cast<T>(v3_), static_cast<T>(v4_), static_cast<T>(v5_),
        static_cast<T>(v6_), static_cast<T>(v7_), static_cast<T>(v8_),
        static_cast<T>(v9_), static_cast<T>(v10_), static_cast<T>(v11_),
        static_cast<T>(v12_)};
    return ValuesIn(array);
  }

 private:
  // No implementation - assignment is unsupported.
  void operator=(const ValueArray12& other);

  const T1 v1_;
  const T2 v2_;
  const T3 v3_;
  const T4 v4_;
  const T5 v5_;
  const T6 v6_;
  const T7 v7_;
  const T8 v8_;
  const T9 v9_;
  const T10 v10_;
  const T11 v11_;
  const T12 v12_;
};

template <typename T1, typename T2, typename T3, typename T4, typename T5,
    typename T6, typename T7, typename T8, typename T9, typename T10,
    typename T11, typename T12, typename T13>
class ValueArray13 {
 public:
  ValueArray13(T1 v1, T2 v2, T3 v3, T4 v4, T5 v5, T6 v6, T7 v7, T8 v8, T9 v9,
      T10 v10, T11 v11, T12 v12, T13 v13) : v1_(v1), v2_(v2), v3_(v3), v4_(v4),
      v5_(v5), v6_(v6), v7_(v7), v8_(v8), v9_(v9), v10_(v10), v11_(v11),
      v12_(v12), v13_(v13) {}

  template <typename T>
  operator ParamGenerator<T>() const {
    const T array[] = {static_cast<T>(v1_), static_cast<T>(v2_),
        static_cast<T>(v3_), static_cast<T>(v4_), static_cast<T>(v5_),
        static_cast<T>(v6_), static_cast<T>(v7_), static_cast<T>(v8_),
        static_cast<T>(v9_), static_cast<T>(v10_), static_cast<T>(v11_),
        static_cast<T>(v12_), static_cast<T>(v13_)};
    return ValuesIn(array);
  }

 private:
  // No implementation - assignment is unsupported.
  void operator=(const ValueArray13& other);

  const T1 v1_;
  const T2 v2_;
  const T3 v3_;
  const T4 v4_;
  const T5 v5_;
  const T6 v6_;
  const T7 v7_;
  const T8 v8_;
  const T9 v9_;
  const T10 v10_;
  const T11 v11_;
  const T12 v12_;
  const T13 v13_;
};

template <typename T1, typename T2, typename T3, typename T4, typename T5,
    typename T6, typename T7, typename T8, typename T9, typename T10,
    typename T11, typename T12, typename T13, typename T14>
class ValueArray14 {
 public:
  ValueArray14(T1 v1, T2 v2, T3 v3, T4 v4, T5 v5, T6 v6, T7 v7, T8 v8, T9 v9,
      T10 v10, T11 v11, T12 v12, T13 v13, T14 v14) : v1_(v1), v2_(v2), v3_(v3),
      v4_(v4), v5_(v5), v6_(v6), v7_(v7), v8_(v8), v9_(v9), v10_(v10),
      v11_(v11), v12_(v12), v13_(v13), v14_(v14) {}

  template <typename T>
  operator ParamGenerator<T>() const {
    const T array[] = {static_cast<T>(v1_), static_cast<T>(v2_),
        static_cast<T>(v3_), static_cast<T>(v4_), static_cast<T>(v5_),
        static_cast<T>(v6_), static_cast<T>(v7_), static_cast<T>(v8_),
        static_cast<T>(v9_), static_cast<T>(v10_), static_cast<T>(v11_),
        static_cast<T>(v12_), static_cast<T>(v13_), static_cast<T>(v14_)};
    return ValuesIn(array);
  }

 private:
  // No implementation - assignment is unsupported.
  void operator=(const ValueArray14& other);

  const T1 v1_;
  const T2 v2_;
  const T3 v3_;
  const T4 v4_;
  const T5 v5_;
  const T6 v6_;
  const T7 v7_;
  const T8 v8_;
  const T9 v9_;
  const T10 v10_;
  const T11 v11_;
  const T12 v12_;
  const T13 v13_;
  const T14 v14_;
};

template <typename T1, typename T2, typename T3, typename T4, typename T5,
    typename T6, typename T7, typename T8, typename T9, typename T10,
    typename T11, typename T12, typename T13, typename T14, typename T15>
class ValueArray15 {
 public:
  ValueArray15(T1 v1, T2 v2, T3 v3, T4 v4, T5 v5, T6 v6, T7 v7, T8 v8, T9 v9,
      T10 v10, T11 v11, T12 v12, T13 v13, T14 v14, T15 v15) : v1_(v1), v2_(v2),
      v3_(v3), v4_(v4), v5_(v5), v6_(v6), v7_(v7), v8_(v8), v9_(v9), v10_(v10),
      v11_(v11), v12_(v12), v13_(v13), v14_(v14), v15_(v15) {}

  template <typename T>
  operator ParamGenerator<T>() const {
    const T array[] = {static_cast<T>(v1_), static_cast<T>(v2_),
        static_cast<T>(v3_), static_cast<T>(v4_), static_cast<T>(v5_),
        static_cast<T>(v6_), static_cast<T>(v7_), static_cast<T>(v8_),
        static_cast<T>(v9_), static_cast<T>(v10_), static_cast<T>(v11_),
        static_cast<T>(v12_), static_cast<T>(v13_), static_cast<T>(v14_),
        static_cast<T>(v15_)};
    return ValuesIn(array);
  }

 private:
  // No implementation - assignment is unsupported.
  void operator=(const ValueArray15& other);

  const T1 v1_;
  const T2 v2_;
  const T3 v3_;
  const T4 v4_;
  const T5 v5_;
  const T6 v6_;
  const T7 v7_;
  const T8 v8_;
  const T9 v9_;
  const T10 v10_;
  const T11 v11_;
  const T12 v12_;
  const T13 v13_;
  const T14 v14_;
  const T15 v15_;
};

template <typename T1, typename T2, typename T3, typename T4, typename T5,
    typename T6, typename T7, typename T8, typename T9, typename T10,
    typename T11, typename T12, typename T13, typename T14, typename T15,
    typename T16>
class ValueArray16 {
 public:
  ValueArray16(T1 v1, T2 v2, T3 v3, T4 v4, T5 v5, T6 v6, T7 v7, T8 v8, T9 v9,
      T10 v10, T11 v11, T12 v12, T13 v13, T14 v14, T15 v15, T16 v16) : v1_(v1),
      v2_(v2), v3_(v3), v4_(v4), v5_(v5), v6_(v6), v7_(v7), v8_(v8), v9_(v9),
      v10_(v10), v11_(v11), v12_(v12), v13_(v13), v14_(v14), v15_(v15),
      v16_(v16) {}

  template <typename T>
  operator ParamGenerator<T>() const {
    const T array[] = {static_cast<T>(v1_), static_cast<T>(v2_),
        static_cast<T>(v3_), static_cast<T>(v4_), static_cast<T>(v5_),
        static_cast<T>(v6_), static_cast<T>(v7_), static_cast<T>(v8_),
        static_cast<T>(v9_), static_cast<T>(v10_), static_cast<T>(v11_),
        static_cast<T>(v12_), static_cast<T>(v13_), static_cast<T>(v14_),
        static_cast<T>(v15_), static_cast<T>(v16_)};
    return ValuesIn(array);
  }

 private:
  // No implementation - assignment is unsupported.
  void operator=(const ValueArray16& other);

  const T1 v1_;
  const T2 v2_;
  const T3 v3_;
  const T4 v4_;
  const T5 v5_;
  const T6 v6_;
  const T7 v7_;
  const T8 v8_;
  const T9 v9_;
  const T10 v10_;
  const T11 v11_;
  const T12 v12_;
  const T13 v13_;
  const T14 v14_;
  const T15 v15_;
  const T16 v16_;
};

template <typename T1, typename T2, typename T3, typename T4, typename T5,
    typename T6, typename T7, typename T8, typename T9, typename T10,
    typename T11, typename T12, typename T13, typename T14, typename T15,
    typename T16, typename T17>
class ValueArray17 {
 public:
  ValueArray17(T1 v1, T2 v2, T3 v3, T4 v4, T5 v5, T6 v6, T7 v7, T8 v8, T9 v9,
      T10 v10, T11 v11, T12 v12, T13 v13, T14 v14, T15 v15, T16 v16,
      T17 v17) : v1_(v1), v2_(v2), v3_(v3), v4_(v4), v5_(v5), v6_(v6), v7_(v7),
      v8_(v8), v9_(v9), v10_(v10), v11_(v11), v12_(v12), v13_(v13), v14_(v14),
      v15_(v15), v16_(v16), v17_(v17) {}

  template <typename T>
  operator ParamGenerator<T>() const {
    const T array[] = {static_cast<T>(v1_), static_cast<T>(v2_),
        static_cast<T>(v3_), static_cast<T>(v4_), static_cast<T>(v5_),
        static_cast<T>(v6_), static_cast<T>(v7_), static_cast<T>(v8_),
        static_cast<T>(v9_), static_cast<T>(v10_), static_cast<T>(v11_),
        static_cast<T>(v12_), static_cast<T>(v13_), static_cast<T>(v14_),
        static_cast<T>(v15_), static_cast<T>(v16_), static_cast<T>(v17_)};
    return ValuesIn(array);
  }

 private:
  // No implementation - assignment is unsupported.
  void operator=(const ValueArray17& other);

  const T1 v1_;
  const T2 v2_;
  const T3 v3_;
  const T4 v4_;
  const T5 v5_;
  const T6 v6_;
  const T7 v7_;
  const T8 v8_;
  const T9 v9_;
  const T10 v10_;
  const T11 v11_;
  const T12 v12_;
  const T13 v13_;
  const T14 v14_;
  const T15 v15_;
  const T16 v16_;
  const T17 v17_;
};

template <typename T1, typename T2, typename T3, typename T4, typename T5,
    typename T6, typename T7, typename T8, typename T9, typename T10,
    typename T11, typename T12, typename T13, typename T14, typename T15,
    typename T16, typename T17, typename T18>
class ValueArray18 {
 public:
  ValueArray18(T1 v1, T2 v2, T3 v3, T4 v4, T5 v5, T6 v6, T7 v7, T8 v8, T9 v9,
      T10 v10, T11 v11, T12 v12, T13 v13, T14 v14, T15 v15, T16 v16, T17 v17,
      T18 v18) : v1_(v1), v2_(v2), v3_(v3), v4_(v4), v5_(v5), v6_(v6), v7_(v7),
      v8_(v8), v9_(v9), v10_(v10), v11_(v11), v12_(v12), v13_(v13), v14_(v14),
      v15_(v15), v16_(v16), v17_(v17), v18_(v18) {}

  template <typename T>
  operator ParamGenerator<T>() const {
    const T array[] = {static_cast<T>(v1_), static_cast<T>(v2_),
        static_cast<T>(v3_), static_cast<T>(v4_), static_cast<T>(v5_),
        static_cast<T>(v6_), static_cast<T>(v7_), static_cast<T>(v8_),
        static_cast<T>(v9_), static_cast<T>(v10_), static_cast<T>(v11_),
        static_cast<T>(v12_), static_cast<T>(v13_), static_cast<T>(v14_),
        static_cast<T>(v15_), static_cast<T>(v16_), static_cast<T>(v17_),
        static_cast<T>(v18_)};
    return ValuesIn(array);
  }

 private:
  // No implementation - assignment is unsupported.
  void operator=(const ValueArray18& other);

  const T1 v1_;
  const T2 v2_;
  const T3 v3_;
  const T4 v4_;
  const T5 v5_;
  const T6 v6_;
  const T7 v7_;
  const T8 v8_;
  const T9 v9_;
  const T10 v10_;
  const T11 v11_;
  const T12 v12_;
  const T13 v13_;
  const T14 v14_;
  const T15 v15_;
  const T16 v16_;
  const T17 v17_;
  const T18 v18_;
};

template <typename T1, typename T2, typename T3, typename T4, typename T5,
    typename T6, typename T7, typename T8, typename T9, typename T10,
    typename T11, typename T12, typename T13, typename T14, typename T15,
    typename T16, typename T17, typename T18, typename T19>
class ValueArray19 {
 public:
  ValueArray19(T1 v1, T2 v2, T3 v3, T4 v4, T5 v5, T6 v6, T7 v7, T8 v8, T9 v9,
      T10 v10, T11 v11, T12 v12, T13 v13, T14 v14, T15 v15, T16 v16, T17 v17,
      T18 v18, T19 v19) : v1_(v1), v2_(v2), v3_(v3), v4_(v4), v5_(v5), v6_(v6),
      v7_(v7), v8_(v8), v9_(v9), v10_(v10), v11_(v11), v12_(v12), v13_(v13),
      v14_(v14), v15_(v15), v16_(v16), v17_(v17), v18_(v18), v19_(v19) {}

  template <typename T>
  operator ParamGenerator<T>() const {
    const T array[] = {static_cast<T>(v1_), static_cast<T>(v2_),
        static_cast<T>(v3_), static_cast<T>(v4_), static_cast<T>(v5_),
        static_cast<T>(v6_), static_cast<T>(v7_), static_cast<T>(v8_),
        static_cast<T>(v9_), static_cast<T>(v10_), static_cast<T>(v11_),
        static_cast<T>(v12_), static_cast<T>(v13_), static_cast<T>(v14_),
        static_cast<T>(v15_), static_cast<T>(v16_), static_cast<T>(v17_),
        static_cast<T>(v18_), static_cast<T>(v19_)};
    return ValuesIn(array);
  }

 private:
  // No implementation - assignment is unsupported.
  void operator=(const ValueArray19& other);

  const T1 v1_;
  const T2 v2_;
  const T3 v3_;
  const T4 v4_;
  const T5 v5_;
  const T6 v6_;
  const T7 v7_;
  const T8 v8_;
  const T9 v9_;
  const T10 v10_;
  const T11 v11_;
  const T12 v12_;
  const T13 v13_;
  const T14 v14_;
  const T15 v15_;
  const T16 v16_;
  const T17 v17_;
  const T18 v18_;
  const T19 v19_;
};

template <typename T1, typename T2, typename T3, typename T4, typename T5,
    typename T6, typename T7, typename T8, typename T9, typename T10,
    typename T11, typename T12, typename T13, typename T14, typename T15,
    typename T16, typename T17, typename T18, typename T19, typename T20>
class ValueArray20 {
 public:
  ValueArray20(T1 v1, T2 v2, T3 v3, T4 v4, T5 v5, T6 v6, T7 v7, T8 v8, T9 v9,
      T10 v10, T11 v11, T12 v12, T13 v13, T14 v14, T15 v15, T16 v16, T17 v17,
      T18 v18, T19 v19, T20 v20) : v1_(v1), v2_(v2), v3_(v3), v4_(v4), v5_(v5),
      v6_(v6), v7_(v7), v8_(v8), v9_(v9), v10_(v10), v11_(v11), v12_(v12),
      v13_(v13), v14_(v14), v15_(v15), v16_(v16), v17_(v17), v18_(v18),
      v19_(v19), v20_(v20) {}

  template <typename T>
  operator ParamGenerator<T>() const {
    const T array[] = {static_cast<T>(v1_), static_cast<T>(v2_),
        static_cast<T>(v3_), static_cast<T>(v4_), static_cast<T>(v5_),
        static_cast<T>(v6_), static_cast<T>(v7_), static_cast<T>(v8_),
        static_cast<T>(v9_), static_cast<T>(v10_), static_cast<T>(v11_),
        static_cast<T>(v12_), static_cast<T>(v13_), static_cast<T>(v14_),
        static_cast<T>(v15_), static_cast<T>(v16_), static_cast<T>(v17_),
        static_cast<T>(v18_), static_cast<T>(v19_), static_cast<T>(v20_)};
    return ValuesIn(array);
  }

 private:
  // No implementation - assignment is unsupported.
  void operator=(const ValueArray20& other);

  const T1 v1_;
  const T2 v2_;
  const T3 v3_;
  const T4 v4_;
  const T5 v5_;
  const T6 v6_;
  const T7 v7_;
  const T8 v8_;
  const T9 v9_;
  const T10 v10_;
  const T11 v11_;
  const T12 v12_;
  const T13 v13_;
  const T14 v14_;
  const T15 v15_;
  const T16 v16_;
  const T17 v17_;
  const T18 v18_;
  const T19 v19_;
  const T20 v20_;
};

template <typename T1, typename T2, typename T3, typename T4, typename T5,
    typename T6, typename T7, typename T8, typename T9, typename T10,
    typename T11, typename T12, typename T13, typename T14, typename T15,
    typename T16, typename T17, typename T18, typename T19, typename T20,
    typename T21>
class ValueArray21 {
 public:
  ValueArray21(T1 v1, T2 v2, T3 v3, T4 v4, T5 v5, T6 v6, T7 v7, T8 v8, T9 v9,
      T10 v10, T11 v11, T12 v12, T13 v13, T14 v14, T15 v15, T16 v16, T17 v17,
      T18 v18, T19 v19, T20 v20, T21 v21) : v1_(v1), v2_(v2), v3_(v3), v4_(v4),
      v5_(v5), v6_(v6), v7_(v7), v8_(v8), v9_(v9), v10_(v10), v11_(v11),
      v12_(v12), v13_(v13), v14_(v14), v15_(v15), v16_(v16), v17_(v17),
      v18_(v18), v19_(v19), v20_(v20), v21_(v21) {}

  template <typename T>
  operator ParamGenerator<T>() const {
    const T array[] = {static_cast<T>(v1_), static_cast<T>(v2_),
        static_cast<T>(v3_), static_cast<T>(v4_), static_cast<T>(v5_),
        static_cast<T>(v6_), static_cast<T>(v7_), static_cast<T>(v8_),
        static_cast<T>(v9_), static_cast<T>(v10_), static_cast<T>(v11_),
        static_cast<T>(v12_), static_cast<T>(v13_), static_cast<T>(v14_),
        static_cast<T>(v15_), static_cast<T>(v16_), static_cast<T>(v17_),
        static_cast<T>(v18_), static_cast<T>(v19_), static_cast<T>(v20_),
        static_cast<T>(v21_)};
    return ValuesIn(array);
  }

 private:
  // No implementation - assignment is unsupported.
  void operator=(const ValueArray21& other);

  const T1 v1_;
  const T2 v2_;
  const T3 v3_;
  const T4 v4_;
  const T5 v5_;
  const T6 v6_;
  const T7 v7_;
  const T8 v8_;
  const T9 v9_;
  const T10 v10_;
  const T11 v11_;
  const T12 v12_;
  const T13 v13_;
  const T14 v14_;
  const T15 v15_;
  const T16 v16_;
  const T17 v17_;
  const T18 v18_;
  const T19 v19_;
  const T20 v20_;
  const T21 v21_;
};

template <typename T1, typename T2, typename T3, typename T4, typename T5,
    typename T6, typename T7, typename T8, typename T9, typename T10,
    typename T11, typename T12, typename T13, typename T14, typename T15,
    typename T16, typename T17, typename T18, typename T19, typename T20,
    typename T21, typename T22>
class ValueArray22 {
 public:
  ValueArray22(T1 v1, T2 v2, T3 v3, T4 v4, T5 v5, T6 v6, T7 v7, T8 v8, T9 v9,
      T10 v10, T11 v11, T12 v12, T13 v13, T14 v14, T15 v15, T16 v16, T17 v17,
      T18 v18, T19 v19, T20 v20, T21 v21, T22 v22) : v1_(v1), v2_(v2), v3_(v3),
      v4_(v4), v5_(v5), v6_(v6), v7_(v7), v8_(v8), v9_(v9), v10_(v10),
      v11_(v11), v12_(v12), v13_(v13), v14_(v14), v15_(v15), v16_(v16),
      v17_(v17), v18_(v18), v19_(v19), v20_(v20), v21_(v21), v22_(v22) {}

  template <typename T>
  operator ParamGenerator<T>() const {
    const T array[] = {static_cast<T>(v1_), static_cast<T>(v2_),
        static_cast<T>(v3_), static_cast<T>(v4_), static_cast<T>(v5_),
        static_cast<T>(v6_), static_cast<T>(v7_), static_cast<T>(v8_),
        static_cast<T>(v9_), static_cast<T>(v10_), static_cast<T>(v11_),
        static_cast<T>(v12_), static_cast<T>(v13_), static_cast<T>(v14_),
        static_cast<T>(v15_), static_cast<T>(v16_), static_cast<T>(v17_),
        static_cast<T>(v18_), static_cast<T>(v19_), static_cast<T>(v20_),
        static_cast<T>(v21_), static_cast<T>(v22_)};
    return ValuesIn(array);
  }

 private:
  // No implementation - assignment is unsupported.
  void operator=(const ValueArray22& other);

  const T1 v1_;
  const T2 v2_;
  const T3 v3_;
  const T4 v4_;
  const T5 v5_;
  const T6 v6_;
  const T7 v7_;
  const T8 v8_;
  const T9 v9_;
  const T10 v10_;
  const T11 v11_;
  const T12 v12_;
  const T13 v13_;
  const T14 v14_;
  const T15 v15_;
  const T16 v16_;
  const T17 v17_;
  const T18 v18_;
  const T19 v19_;
  const T20 v20_;
  const T21 v21_;
  const T22 v22_;
};

template <typename T1, typename T2, typename T3, typename T4, typename T5,
    typename T6, typename T7, typename T8, typename T9, typename T10,
    typename T11, typename T12, typename T13, typename T14, typename T15,
    typename T16, typename T17, typename T18, typename T19, typename T20,
    typename T21, typename T22, typename T23>
class ValueArray23 {
 public:
  ValueArray23(T1 v1, T2 v2, T3 v3, T4 v4, T5 v5, T6 v6, T7 v7, T8 v8, T9 v9,
      T10 v10, T11 v11, T12 v12, T13 v13, T14 v14, T15 v15, T16 v16, T17 v17,
      T18 v18, T19 v19, T20 v20, T21 v21, T22 v22, T23 v23) : v1_(v1), v2_(v2),
      v3_(v3), v4_(v4), v5_(v5), v6_(v6), v7_(v7), v8_(v8), v9_(v9), v10_(v10),
      v11_(v11), v12_(v12), v13_(v13), v14_(v14), v15_(v15), v16_(v16),
      v17_(v17), v18_(v18), v19_(v19), v20_(v20), v21_(v21), v22_(v22),
      v23_(v23) {}

  template <typename T>
  operator ParamGenerator<T>() const {
    const T array[] = {static_cast<T>(v1_), static_cast<T>(v2_),
        static_cast<T>(v3_), static_cast<T>(v4_), static_cast<T>(v5_),
        static_cast<T>(v6_), static_cast<T>(v7_), static_cast<T>(v8_),
        static_cast<T>(v9_), static_cast<T>(v10_), static_cast<T>(v11_),
        static_cast<T>(v12_), static_cast<T>(v13_), static_cast<T>(v14_),
        static_cast<T>(v15_), static_cast<T>(v16_), static_cast<T>(v17_),
        static_cast<T>(v18_), static_cast<T>(v19_), static_cast<T>(v20_),
        static_cast<T>(v21_), static_cast<T>(v22_), static_cast<T>(v23_)};
    return ValuesIn(array);
  }

 private:
  // No implementation - assignment is unsupported.
  void operator=(const ValueArray23& other);

  const T1 v1_;
  const T2 v2_;
  const T3 v3_;
  const T4 v4_;
  const T5 v5_;
  const T6 v6_;
  const T7 v7_;
  const T8 v8_;
  const T9 v9_;
  const T10 v10_;
  const T11 v11_;
  const T12 v12_;
  const T13 v13_;
  const T14 v14_;
  const T15 v15_;
  const T16 v16_;
  const T17 v17_;
  const T18 v18_;
  const T19 v19_;
  const T20 v20_;
  const T21 v21_;
  const T22 v22_;
  const T23 v23_;
};

template <typename T1, typename T2, typename T3, typename T4, typename T5,
    typename T6, typename T7, typename T8, typename T9, typename T10,
    typename T11, typename T12, typename T13, typename T14, typename T15,
    typename T16, typename T17, typename T18, typename T19, typename T20,
    typename T21, typename T22, typename T23, typename T24>
class ValueArray24 {
 public:
  ValueArray24(T1 v1, T2 v2, T3 v3, T4 v4, T5 v5, T6 v6, T7 v7, T8 v8, T9 v9,
      T10 v10, T11 v11, T12 v12, T13 v13, T14 v14, T15 v15, T16 v16, T17 v17,
      T18 v18, T19 v19, T20 v20, T21 v21, T22 v22, T23 v23, T24 v24) : v1_(v1),
      v2_(v2), v3_(v3), v4_(v4), v5_(v5), v6_(v6), v7_(v7), v8_(v8), v9_(v9),
      v10_(v10), v11_(v11), v12_(v12), v13_(v13), v14_(v14), v15_(v15),
      v16_(v16), v17_(v17), v18_(v18), v19_(v19), v20_(v20), v21_(v21),
      v22_(v22), v23_(v23), v24_(v24) {}

  template <typename T>
  operator ParamGenerator<T>() const {
    const T array[] = {static_cast<T>(v1_), static_cast<T>(v2_),
        static_cast<T>(v3_), static_cast<T>(v4_), static_cast<T>(v5_),
        static_cast<T>(v6_), static_cast<T>(v7_), static_cast<T>(v8_),
        static_cast<T>(v9_), static_cast<T>(v10_), static_cast<T>(v11_),
        static_cast<T>(v12_), static_cast<T>(v13_), static_cast<T>(v14_),
        static_cast<T>(v15_), static_cast<T>(v16_), static_cast<T>(v17_),
        static_cast<T>(v18_), static_cast<T>(v19_), static_cast<T>(v20_),
        static_cast<T>(v21_), static_cast<T>(v22_), static_cast<T>(v23_),
        static_cast<T>(v24_)};
    return ValuesIn(array);
  }

 private:
  // No implementation - assignment is unsupported.
  void operator=(const ValueArray24& other);

  const T1 v1_;
  const T2 v2_;
  const T3 v3_;
  const T4 v4_;
  const T5 v5_;
  const T6 v6_;
  const T7 v7_;
  const T8 v8_;
  const T9 v9_;
  const T10 v10_;
  const T11 v11_;
  const T12 v12_;
  const T13 v13_;
  const T14 v14_;
  const T15 v15_;
  const T16 v16_;
  const T17 v17_;
  const T18 v18_;
  const T19 v19_;
  const T20 v20_;
  const T21 v21_;
  const T22 v22_;
  const T23 v23_;
  const T24 v24_;
};

template <typename T1, typename T2, typename T3, typename T4, typename T5,
    typename T6, typename T7, typename T8, typename T9, typename T10,
    typename T11, typename T12, typename T13, typename T14, typename T15,
    typename T16, typename T17, typename T18, typename T19, typename T20,
    typename T21, typename T22, typename T23, typename T24, typename T25>
class ValueArray25 {
 public:
  ValueArray25(T1 v1, T2 v2, T3 v3, T4 v4, T5 v5, T6 v6, T7 v7, T8 v8, T9 v9,
      T10 v10, T11 v11, T12 v12, T13 v13, T14 v14, T15 v15, T16 v16, T17 v17,
      T18 v18, T19 v19, T20 v20, T21 v21, T22 v22, T23 v23, T24 v24,
      T25 v25) : v1_(v1), v2_(v2), v3_(v3), v4_(v4), v5_(v5), v6_(v6), v7_(v7),
      v8_(v8), v9_(v9), v10_(v10), v11_(v11), v12_(v12), v13_(v13), v14_(v14),
      v15_(v15), v16_(v16), v17_(v17), v18_(v18), v19_(v19), v20_(v20),
      v21_(v21), v22_(v22), v23_(v23), v24_(v24), v25_(v25) {}

  template <typename T>
  operator ParamGenerator<T>() const {
    const T array[] = {static_cast<T>(v1_), static_cast<T>(v2_),
        static_cast<T>(v3_), static_cast<T>(v4_), static_cast<T>(v5_),
        static_cast<T>(v6_), static_cast<T>(v7_), static_cast<T>(v8_),
        static_cast<T>(v9_), static_cast<T>(v10_), static_cast<T>(v11_),
        static_cast<T>(v12_), static_cast<T>(v13_), static_cast<T>(v14_),
        static_cast<T>(v15_), static_cast<T>(v16_), static_cast<T>(v17_),
        static_cast<T>(v18_), static_cast<T>(v19_), static_cast<T>(v20_),
        static_cast<T>(v21_), static_cast<T>(v22_), static_cast<T>(v23_),
        static_cast<T>(v24_), static_cast<T>(v25_)};
    return ValuesIn(array);
  }

 private:
  // No implementation - assignment is unsupported.
  void operator=(const ValueArray25& other);

  const T1 v1_;
  const T2 v2_;
  const T3 v3_;
  const T4 v4_;
  const T5 v5_;
  const T6 v6_;
  const T7 v7_;
  const T8 v8_;
  const T9 v9_;
  const T10 v10_;
  const T11 v11_;
  const T12 v12_;
  const T13 v13_;
  const T14 v14_;
  const T15 v15_;
  const T16 v16_;
  const T17 v17_;
  const T18 v18_;
  const T19 v19_;
  const T20 v20_;
  const T21 v21_;
  const T22 v22_;
  const T23 v23_;
  const T24 v24_;
  const T25 v25_;
};

template <typename T1, typename T2, typename T3, typename T4, typename T5,
    typename T6, typename T7, typename T8, typename T9, typename T10,
    typename T11, typename T12, typename T13, typename T14, typename T15,
    typename T16, typename T17, typename T18, typename T19, typename T20,
    typename T21, typename T22, typename T23, typename T24, typename T25,
    typename T26>
class ValueArray26 {
 public:
  ValueArray26(T1 v1, T2 v2, T3 v3, T4 v4, T5 v5, T6 v6, T7 v7, T8 v8, T9 v9,
      T10 v10, T11 v11, T12 v12, T13 v13, T14 v14, T15 v15, T16 v16, T17 v17,
      T18 v18, T19 v19, T20 v20, T21 v21, T22 v22, T23 v23, T24 v24, T25 v25,
      T26 v26) : v1_(v1), v2_(v2), v3_(v3), v4_(v4), v5_(v5), v6_(v6), v7_(v7),
      v8_(v8), v9_(v9), v10_(v10), v11_(v11), v12_(v12), v13_(v13), v14_(v14),
      v15_(v15), v16_(v16), v17_(v17), v18_(v18), v19_(v19), v20_(v20),
      v21_(v21), v22_(v22), v23_(v23), v24_(v24), v25_(v25), v26_(v26) {}

  template <typename T>
  operator ParamGenerator<T>() const {
    const T array[] = {static_cast<T>(v1_), static_cast<T>(v2_),
        static_cast<T>(v3_), static_cast<T>(v4_), static_cast<T>(v5_),
        static_cast<T>(v6_), static_cast<T>(v7_), static_cast<T>(v8_),
        static_cast<T>(v9_), static_cast<T>(v10_), static_cast<T>(v11_),
        static_cast<T>(v12_), static_cast<T>(v13_), static_cast<T>(v14_),
        static_cast<T>(v15_), static_cast<T>(v16_), static_cast<T>(v17_),
        static_cast<T>(v18_), static_cast<T>(v19_), static_cast<T>(v20_),
        static_cast<T>(v21_), static_cast<T>(v22_), static_cast<T>(v23_),
        static_cast<T>(v24_), static_cast<T>(v25_), static_cast<T>(v26_)};
    return ValuesIn(array);
  }

 private:
  // No implementation - assignment is unsupported.
  void operator=(const ValueArray26& other);

  const T1 v1_;
  const T2 v2_;
  const T3 v3_;
  const T4 v4_;
  const T5 v5_;
  const T6 v6_;
  const T7 v7_;
  const T8 v8_;
  const T9 v9_;
  const T10 v10_;
  const T11 v11_;
  const T12 v12_;
  const T13 v13_;
  const T14 v14_;
  const T15 v15_;
  const T16 v16_;
  const T17 v17_;
  const T18 v18_;
  const T19 v19_;
  const T20 v20_;
  const T21 v21_;
  const T22 v22_;
  const T23 v23_;
  const T24 v24_;
  const T25 v25_;
  const T26 v26_;
};

template <typename T1, typename T2, typename T3, typename T4, typename T5,
    typename T6, typename T7, typename T8, typename T9, typename T10,
    typename T11, typename T12, typename T13, typename T14, typename T15,
    typename T16, typename T17, typename T18, typename T19, typename T20,
    typename T21, typename T22, typename T23, typename T24, typename T25,
    typename T26, typename T27>
class ValueArray27 {
 public:
  ValueArray27(T1 v1, T2 v2, T3 v3, T4 v4, T5 v5, T6 v6, T7 v7, T8 v8, T9 v9,
      T10 v10, T11 v11, T12 v12, T13 v13, T14 v14, T15 v15, T16 v16, T17 v17,
      T18 v18, T19 v19, T20 v20, T21 v21, T22 v22, T23 v23, T24 v24, T25 v25,
      T26 v26, T27 v27) : v1_(v1), v2_(v2), v3_(v3), v4_(v4), v5_(v5), v6_(v6),
      v7_(v7), v8_(v8), v9_(v9), v10_(v10), v11_(v11), v12_(v12), v13_(v13),
      v14_(v14), v15_(v15), v16_(v16), v17_(v17), v18_(v18), v19_(v19),
      v20_(v20), v21_(v21), v22_(v22), v23_(v23), v24_(v24), v25_(v25),
      v26_(v26), v27_(v27) {}

  template <typename T>
  operator ParamGenerator<T>() const {
    const T array[] = {static_cast<T>(v1_), static_cast<T>(v2_),
        static_cast<T>(v3_), static_cast<T>(v4_), static_cast<T>(v5_),
        static_cast<T>(v6_), static_cast<T>(v7_), static_cast<T>(v8_),
        static_cast<T>(v9_), static_cast<T>(v10_), static_cast<T>(v11_),
        static_cast<T>(v12_), static_cast<T>(v13_), static_cast<T>(v14_),
        static_cast<T>(v15_), static_cast<T>(v16_), static_cast<T>(v17_),
        static_cast<T>(v18_), static_cast<T>(v19_), static_cast<T>(v20_),
        static_cast<T>(v21_), static_cast<T>(v22_), static_cast<T>(v23_),
        static_cast<T>(v24_), static_cast<T>(v25_), static_cast<T>(v26_),
        static_cast<T>(v27_)};
    return ValuesIn(array);
  }

 private:
  // No implementation - assignment is unsupported.
  void operator=(const ValueArray27& other);

  const T1 v1_;
  const T2 v2_;
  const T3 v3_;
  const T4 v4_;
  const T5 v5_;
  const T6 v6_;
  const T7 v7_;
  const T8 v8_;
  const T9 v9_;
  const T10 v10_;
  const T11 v11_;
  const T12 v12_;
  const T13 v13_;
  const T14 v14_;
  const T15 v15_;
  const T16 v16_;
  const T17 v17_;
  const T18 v18_;
  const T19 v19_;
  const T20 v20_;
  const T21 v21_;
  const T22 v22_;
  const T23 v23_;
  const T24 v24_;
  const T25 v25_;
  const T26 v26_;
  const T27 v27_;
};

template <typename T1, typename T2, typename T3, typename T4, typename T5,
    typename T6, typename T7, typename T8, typename T9, typename T10,
    typename T11, typename T12, typename T13, typename T14, typename T15,
    typename T16, typename T17, typename T18, typename T19, typename T20,
    typename T21, typename T22, typename T23, typename T24, typename T25,
    typename T26, typename T27, typename T28>
class ValueArray28 {
 public:
  ValueArray28(T1 v1, T2 v2, T3 v3, T4 v4, T5 v5, T6 v6, T7 v7, T8 v8, T9 v9,
      T10 v10, T11 v11, T12 v12, T13 v13, T14 v14, T15 v15, T16 v16, T17 v17,
      T18 v18, T19 v19, T20 v20, T21 v21, T22 v22, T23 v23, T24 v24, T25 v25,
      T26 v26, T27 v27, T28 v28) : v1_(v1), v2_(v2), v3_(v3), v4_(v4), v5_(v5),
      v6_(v6), v7_(v7), v8_(v8), v9_(v9), v10_(v10), v11_(v11), v12_(v12),
      v13_(v13), v14_(v14), v15_(v15), v16_(v16), v17_(v17), v18_(v18),
      v19_(v19), v20_(v20), v21_(v21), v22_(v22), v23_(v23), v24_(v24),
      v25_(v25), v26_(v26), v27_(v27), v28_(v28) {}

  template <typename T>
  operator ParamGenerator<T>() const {
    const T array[] = {static_cast<T>(v1_), static_cast<T>(v2_),
        static_cast<T>(v3_), static_cast<T>(v4_), static_cast<T>(v5_),
        static_cast<T>(v6_), static_cast<T>(v7_), static_cast<T>(v8_),
        static_cast<T>(v9_), static_cast<T>(v10_), static_cast<T>(v11_),
        static_cast<T>(v12_), static_cast<T>(v13_), static_cast<T>(v14_),
        static_cast<T>(v15_), static_cast<T>(v16_), static_cast<T>(v17_),
        static_cast<T>(v18_), static_cast<T>(v19_), static_cast<T>(v20_),
        static_cast<T>(v21_), static_cast<T>(v22_), static_cast<T>(v23_),
        static_cast<T>(v24_), static_cast<T>(v25_), static_cast<T>(v26_),
        static_cast<T>(v27_), static_cast<T>(v28_)};
    return ValuesIn(array);
  }

 private:
  // No implementation - assignment is unsupported.
  void operator=(const ValueArray28& other);

  const T1 v1_;
  const T2 v2_;
  const T3 v3_;
  const T4 v4_;
  const T5 v5_;
  const T6 v6_;
  const T7 v7_;
  const T8 v8_;
  const T9 v9_;
  const T10 v10_;
  const T11 v11_;
  const T12 v12_;
  const T13 v13_;
  const T14 v14_;
  const T15 v15_;
  const T16 v16_;
  const T17 v17_;
  const T18 v18_;
  const T19 v19_;
  const T20 v20_;
  const T21 v21_;
  const T22 v22_;
  const T23 v23_;
  const T24 v24_;
  const T25 v25_;
  const T26 v26_;
  const T27 v27_;
  const T28 v28_;
};

template <typename T1, typename T2, typename T3, typename T4, typename T5,
    typename T6, typename T7, typename T8, typename T9, typename T10,
    typename T11, typename T12, typename T13, typename T14, typename T15,
    typename T16, typename T17, typename T18, typename T19, typename T20,
    typename T21, typename T22, typename T23, typename T24, typename T25,
    typename T26, typename T27, typename T28, typename T29>
class ValueArray29 {
 public:
  ValueArray29(T1 v1, T2 v2, T3 v3, T4 v4, T5 v5, T6 v6, T7 v7, T8 v8, T9 v9,
      T10 v10, T11 v11, T12 v12, T13 v13, T14 v14, T15 v15, T16 v16, T17 v17,
      T18 v18, T19 v19, T20 v20, T21 v21, T22 v22, T23 v23, T24 v24, T25 v25,
      T26 v26, T27 v27, T28 v28, T29 v29) : v1_(v1), v2_(v2), v3_(v3), v4_(v4),
      v5_(v5), v6_(v6), v7_(v7), v8_(v8), v9_(v9), v10_(v10), v11_(v11),
      v12_(v12), v13_(v13), v14_(v14), v15_(v15), v16_(v16), v17_(v17),
      v18_(v18), v19_(v19), v20_(v20), v21_(v21), v22_(v22), v23_(v23),
      v24_(v24), v25_(v25), v26_(v26), v27_(v27), v28_(v28), v29_(v29) {}

  template <typename T>
  operator ParamGenerator<T>() const {
    const T array[] = {static_cast<T>(v1_), static_cast<T>(v2_),
        static_cast<T>(v3_), static_cast<T>(v4_), static_cast<T>(v5_),
        static_cast<T>(v6_), static_cast<T>(v7_), static_cast<T>(v8_),
        static_cast<T>(v9_), static_cast<T>(v10_), static_cast<T>(v11_),
        static_cast<T>(v12_), static_cast<T>(v13_), static_cast<T>(v14_),
        static_cast<T>(v15_), static_cast<T>(v16_), static_cast<T>(v17_),
        static_cast<T>(v18_), static_cast<T>(v19_), static_cast<T>(v20_),
        static_cast<T>(v21_), static_cast<T>(v22_), static_cast<T>(v23_),
        static_cast<T>(v24_), static_cast<T>(v25_), static_cast<T>(v26_),
        static_cast<T>(v27_), static_cast<T>(v28_), static_cast<T>(v29_)};
    return ValuesIn(array);
  }

 private:
  // No implementation - assignment is unsupported.
  void operator=(const ValueArray29& other);

  const T1 v1_;
  const T2 v2_;
  const T3 v3_;
  const T4 v4_;
  const T5 v5_;
  const T6 v6_;
  const T7 v7_;
  const T8 v8_;
  const T9 v9_;
  const T10 v10_;
  const T11 v11_;
  const T12 v12_;
  const T13 v13_;
  const T14 v14_;
  const T15 v15_;
  const T16 v16_;
  const T17 v17_;
  const T18 v18_;
  const T19 v19_;
  const T20 v20_;
  const T21 v21_;
  const T22 v22_;
  const T23 v23_;
  const T24 v24_;
  const T25 v25_;
  const T26 v26_;
  const T27 v27_;
  const T28 v28_;
  const T29 v29_;
};

template <typename T1, typename T2, typename T3, typename T4, typename T5,
    typename T6, typename T7, typename T8, typename T9, typename T10,
    typename T11, typename T12, typename T13, typename T14, typename T15,
    typename T16, typename T17, typename T18, typename T19, typename T20,
    typename T21, typename T22, typename T23, typename T24, typename T25,
    typename T26, typename T27, typename T28, typename T29, typename T30>
class ValueArray30 {
 public:
  ValueArray30(T1 v1, T2 v2, T3 v3, T4 v4, T5 v5, T6 v6, T7 v7, T8 v8, T9 v9,
      T10 v10, T11 v11, T12 v12, T13 v13, T14 v14, T15 v15, T16 v16, T17 v17,
      T18 v18, T19 v19, T20 v20, T21 v21, T22 v22, T23 v23, T24 v24, T25 v25,
      T26 v26, T27 v27, T28 v28, T29 v29, T30 v30) : v1_(v1), v2_(v2), v3_(v3),
      v4_(v4), v5_(v5), v6_(v6), v7_(v7), v8_(v8), v9_(v9), v10_(v10),
      v11_(v11), v12_(v12), v13_(v13), v14_(v14), v15_(v15), v16_(v16),
      v17_(v17), v18_(v18), v19_(v19), v20_(v20), v21_(v21), v22_(v22),
      v23_(v23), v24_(v24), v25_(v25), v26_(v26), v27_(v27), v28_(v28),
      v29_(v29), v30_(v30) {}

  template <typename T>
  operator ParamGenerator<T>() const {
    const T array[] = {static_cast<T>(v1_), static_cast<T>(v2_),
        static_cast<T>(v3_), static_cast<T>(v4_), static_cast<T>(v5_),
        static_cast<T>(v6_), static_cast<T>(v7_), static_cast<T>(v8_),
        static_cast<T>(v9_), static_cast<T>(v10_), static_cast<T>(v11_),
        static_cast<T>(v12_), static_cast<T>(v13_), static_cast<T>(v14_),
        static_cast<T>(v15_), static_cast<T>(v16_), static_cast<T>(v17_),
        static_cast<T>(v18_), static_cast<T>(v19_), static_cast<T>(v20_),
        static_cast<T>(v21_), static_cast<T>(v22_), static_cast<T>(v23_),
        static_cast<T>(v24_), static_cast<T>(v25_), static_cast<T>(v26_),
        static_cast<T>(v27_), static_cast<T>(v28_), static_cast<T>(v29_),
        static_cast<T>(v30_)};
    return ValuesIn(array);
  }

 private:
  // No implementation - assignment is unsupported.
  void operator=(const ValueArray30& other);

  const T1 v1_;
  const T2 v2_;
  const T3 v3_;
  const T4 v4_;
  const T5 v5_;
  const T6 v6_;
  const T7 v7_;
  const T8 v8_;
  const T9 v9_;
  const T10 v10_;
  const T11 v11_;
  const T12 v12_;
  const T13 v13_;
  const T14 v14_;
  const T15 v15_;
  const T16 v16_;
  const T17 v17_;
  const T18 v18_;
  const T19 v19_;
  const T20 v20_;
  const T21 v21_;
  const T22 v22_;
  const T23 v23_;
  const T24 v24_;
  const T25 v25_;
  const T26 v26_;
  const T27 v27_;
  const T28 v28_;
  const T29 v29_;
  const T30 v30_;
};

template <typename T1, typename T2, typename T3, typename T4, typename T5,
    typename T6, typename T7, typename T8, typename T9, typename T10,
    typename T11, typename T12, typename T13, typename T14, typename T15,
    typename T16, typename T17, typename T18, typename T19, typename T20,
    typename T21, typename T22, typename T23, typename T24, typename T25,
    typename T26, typename T27, typename T28, typename T29, typename T30,
    typename T31>
class ValueArray31 {
 public:
  ValueArray31(T1 v1, T2 v2, T3 v3, T4 v4, T5 v5, T6 v6, T7 v7, T8 v8, T9 v9,
      T10 v10, T11 v11, T12 v12, T13 v13, T14 v14, T15 v15, T16 v16, T17 v17,
      T18 v18, T19 v19, T20 v20, T21 v21, T22 v22, T23 v23, T24 v24, T25 v25,
      T26 v26, T27 v27, T28 v28, T29 v29, T30 v30, T31 v31) : v1_(v1), v2_(v2),
      v3_(v3), v4_(v4), v5_(v5), v6_(v6), v7_(v7), v8_(v8), v9_(v9), v10_(v10),
      v11_(v11), v12_(v12), v13_(v13), v14_(v14), v15_(v15), v16_(v16),
      v17_(v17), v18_(v18), v19_(v19), v20_(v20), v21_(v21), v22_(v22),
      v23_(v23), v24_(v24), v25_(v25), v26_(v26), v27_(v27), v28_(v28),
      v29_(v29), v30_(v30), v31_(v31) {}

  template <typename T>
  operator ParamGenerator<T>() const {
    const T array[] = {static_cast<T>(v1_), static_cast<T>(v2_),
        static_cast<T>(v3_), static_cast<T>(v4_), static_cast<T>(v5_),
        static_cast<T>(v6_), static_cast<T>(v7_), static_cast<T>(v8_),
        static_cast<T>(v9_), static_cast<T>(v10_), static_cast<T>(v11_),
        static_cast<T>(v12_), static_cast<T>(v13_), static_cast<T>(v14_),
        static_cast<T>(v15_), static_cast<T>(v16_), static_cast<T>(v17_),
        static_cast<T>(v18_), static_cast<T>(v19_), static_cast<T>(v20_),
        static_cast<T>(v21_), static_cast<T>(v22_), static_cast<T>(v23_),
        static_cast<T>(v24_), static_cast<T>(v25_), static_cast<T>(v26_),
        static_cast<T>(v27_), static_cast<T>(v28_), static_cast<T>(v29_),
        static_cast<T>(v30_), static_cast<T>(v31_)};
    return ValuesIn(array);
  }

 private:
  // No implementation - assignment is unsupported.
  void operator=(const ValueArray31& other);

  const T1 v1_;
  const T2 v2_;
  const T3 v3_;
  const T4 v4_;
  const T5 v5_;
  const T6 v6_;
  const T7 v7_;
  const T8 v8_;
  const T9 v9_;
  const T10 v10_;
  const T11 v11_;
  const T12 v12_;
  const T13 v13_;
  const T14 v14_;
  const T15 v15_;
  const T16 v16_;
  const T17 v17_;
  const T18 v18_;
  const T19 v19_;
  const T20 v20_;
  const T21 v21_;
  const T22 v22_;
  const T23 v23_;
  const T24 v24_;
  const T25 v25_;
  const T26 v26_;
  const T27 v27_;
  const T28 v28_;
  const T29 v29_;
  const T30 v30_;
  const T31 v31_;
};

template <typename T1, typename T2, typename T3, typename T4, typename T5,
    typename T6, typename T7, typename T8, typename T9, typename T10,
    typename T11, typename T12, typename T13, typename T14, typename T15,
    typename T16, typename T17, typename T18, typename T19, typename T20,
    typename T21, typename T22, typename T23, typename T24, typename T25,
    typename T26, typename T27, typename T28, typename T29, typename T30,
    typename T31, typename T32>
class ValueArray32 {
 public:
  ValueArray32(T1 v1, T2 v2, T3 v3, T4 v4, T5 v5, T6 v6, T7 v7, T8 v8, T9 v9,
      T10 v10, T11 v11, T12 v12, T13 v13, T14 v14, T15 v15, T16 v16, T17 v17,
      T18 v18, T19 v19, T20 v20, T21 v21, T22 v22, T23 v23, T24 v24, T25 v25,
      T26 v26, T27 v27, T28 v28, T29 v29, T30 v30, T31 v31, T32 v32) : v1_(v1),
      v2_(v2), v3_(v3), v4_(v4), v5_(v5), v6_(v6), v7_(v7), v8_(v8), v9_(v9),
      v10_(v10), v11_(v11), v12_(v12), v13_(v13), v14_(v14), v15_(v15),
      v16_(v16), v17_(v17), v18_(v18), v19_(v19), v20_(v20), v21_(v21),
      v22_(v22), v23_(v23), v24_(v24), v25_(v25), v26_(v26), v27_(v27),
      v28_(v28), v29_(v29), v30_(v30), v31_(v31), v32_(v32) {}

  template <typename T>
  operator ParamGenerator<T>() const {
    const T array[] = {static_cast<T>(v1_), static_cast<T>(v2_),
        static_cast<T>(v3_), static_cast<T>(v4_), static_cast<T>(v5_),
        static_cast<T>(v6_), static_cast<T>(v7_), static_cast<T>(v8_),
        static_cast<T>(v9_), static_cast<T>(v10_), static_cast<T>(v11_),
        static_cast<T>(v12_), static_cast<T>(v13_), static_cast<T>(v14_),
        static_cast<T>(v15_), static_cast<T>(v16_), static_cast<T>(v17_),
        static_cast<T>(v18_), static_cast<T>(v19_), static_cast<T>(v20_),
        static_cast<T>(v21_), static_cast<T>(v22_), static_cast<T>(v23_),
        static_cast<T>(v24_), static_cast<T>(v25_), static_cast<T>(v26_),
        static_cast<T>(v27_), static_cast<T>(v28_), static_cast<T>(v29_),
        static_cast<T>(v30_), static_cast<T>(v31_), static_cast<T>(v32_)};
    return ValuesIn(array);
  }

 private:
  // No implementation - assignment is unsupported.
  void operator=(const ValueArray32& other);

  const T1 v1_;
  const T2 v2_;
  const T3 v3_;
  const T4 v4_;
  const T5 v5_;
  const T6 v6_;
  const T7 v7_;
  const T8 v8_;
  const T9 v9_;
  const T10 v10_;
  const T11 v11_;
  const T12 v12_;
  const T13 v13_;
  const T14 v14_;
  const T15 v15_;
  const T16 v16_;
  const T17 v17_;
  const T18 v18_;
  const T19 v19_;
  const T20 v20_;
  const T21 v21_;
  const T22 v22_;
  const T23 v23_;
  const T24 v24_;
  const T25 v25_;
  const T26 v26_;
  const T27 v27_;
  const T28 v28_;
  const T29 v29_;
  const T30 v30_;
  const T31 v31_;
  const T32 v32_;
};

template <typename T1, typename T2, typename T3, typename T4, typename T5,
    typename T6, typename T7, typename T8, typename T9, typename T10,
    typename T11, typename T12, typename T13, typename T14, typename T15,
    typename T16, typename T17, typename T18, typename T19, typename T20,
    typename T21, typename T22, typename T23, typename T24, typename T25,
    typename T26, typename T27, typename T28, typename T29, typename T30,
    typename T31, typename T32, typename T33>
class ValueArray33 {
 public:
  ValueArray33(T1 v1, T2 v2, T3 v3, T4 v4, T5 v5, T6 v6, T7 v7, T8 v8, T9 v9,
      T10 v10, T11 v11, T12 v12, T13 v13, T14 v14, T15 v15, T16 v16, T17 v17,
      T18 v18, T19 v19, T20 v20, T21 v21, T22 v22, T23 v23, T24 v24, T25 v25,
      T26 v26, T27 v27, T28 v28, T29 v29, T30 v30, T31 v31, T32 v32,
      T33 v33) : v1_(v1), v2_(v2), v3_(v3), v4_(v4), v5_(v5), v6_(v6), v7_(v7),
      v8_(v8), v9_(v9), v10_(v10), v11_(v11), v12_(v12), v13_(v13), v14_(v14),
      v15_(v15), v16_(v16), v17_(v17), v18_(v18), v19_(v19), v20_(v20),
      v21_(v21), v22_(v22), v23_(v23), v24_(v24), v25_(v25), v26_(v26),
      v27_(v27), v28_(v28), v29_(v29), v30_(v30), v31_(v31), v32_(v32),
      v33_(v33) {}

  template <typename T>
  operator ParamGenerator<T>() const {
    const T array[] = {static_cast<T>(v1_), static_cast<T>(v2_),
        static_cast<T>(v3_), static_cast<T>(v4_), static_cast<T>(v5_),
        static_cast<T>(v6_), static_cast<T>(v7_), static_cast<T>(v8_),
        static_cast<T>(v9_), static_cast<T>(v10_), static_cast<T>(v11_),
        static_cast<T>(v12_), static_cast<T>(v13_), static_cast<T>(v14_),
        static_cast<T>(v15_), static_cast<T>(v16_), static_cast<T>(v17_),
        static_cast<T>(v18_), static_cast<T>(v19_), static_cast<T>(v20_),
        static_cast<T>(v21_), static_cast<T>(v22_), static_cast<T>(v23_),
        static_cast<T>(v24_), static_cast<T>(v25_), static_cast<T>(v26_),
        static_cast<T>(v27_), static_cast<T>(v28_), static_cast<T>(v29_),
        static_cast<T>(v30_), static_cast<T>(v31_), static_cast<T>(v32_),
        static_cast<T>(v33_)};
    return ValuesIn(array);
  }

 private:
  // No implementation - assignment is unsupported.
  void operator=(const ValueArray33& other);

  const T1 v1_;
  const T2 v2_;
  const T3 v3_;
  const T4 v4_;
  const T5 v5_;
  const T6 v6_;
  const T7 v7_;
  const T8 v8_;
  const T9 v9_;
  const T10 v10_;
  const T11 v11_;
  const T12 v12_;
  const T13 v13_;
  const T14 v14_;
  const T15 v15_;
  const T16 v16_;
  const T17 v17_;
  const T18 v18_;
  const T19 v19_;
  const T20 v20_;
  const T21 v21_;
  const T22 v22_;
  const T23 v23_;
  const T24 v24_;
  const T25 v25_;
  const T26 v26_;
  const T27 v27_;
  const T28 v28_;
  const T29 v29_;
  const T30 v30_;
  const T31 v31_;
  const T32 v32_;
  const T33 v33_;
};

template <typename T1, typename T2, typename T3, typename T4, typename T5,
    typename T6, typename T7, typename T8, typename T9, typename T10,
    typename T11, typename T12, typename T13, typename T14, typename T15,
    typename T16, typename T17, typename T18, typename T19, typename T20,
    typename T21, typename T22, typename T23, typename T24, typename T25,
    typename T26, typename T27, typename T28, typename T29, typename T30,
    typename T31, typename T32, typename T33, typename T34>
class ValueArray34 {
 public:
  ValueArray34(T1 v1, T2 v2, T3 v3, T4 v4, T5 v5, T6 v6, T7 v7, T8 v8, T9 v9,
      T10 v10, T11 v11, T12 v12, T13 v13, T14 v14, T15 v15, T16 v16, T17 v17,
      T18 v18, T19 v19, T20 v20, T21 v21, T22 v22, T23 v23, T24 v24, T25 v25,
      T26 v26, T27 v27, T28 v28, T29 v29, T30 v30, T31 v31, T32 v32, T33 v33,
      T34 v34) : v1_(v1), v2_(v2), v3_(v3), v4_(v4), v5_(v5), v6_(v6), v7_(v7),
      v8_(v8), v9_(v9), v10_(v10), v11_(v11), v12_(v12), v13_(v13), v14_(v14),
      v15_(v15), v16_(v16), v17_(v17), v18_(v18), v19_(v19), v20_(v20),
      v21_(v21), v22_(v22), v23_(v23), v24_(v24), v25_(v25), v26_(v26),
      v27_(v27), v28_(v28), v29_(v29), v30_(v30), v31_(v31), v32_(v32),
      v33_(v33), v34_(v34) {}

  template <typename T>
  operator ParamGenerator<T>() const {
    const T array[] = {static_cast<T>(v1_), static_cast<T>(v2_),
        static_cast<T>(v3_), static_cast<T>(v4_), static_cast<T>(v5_),
        static_cast<T>(v6_), static_cast<T>(v7_), static_cast<T>(v8_),
        static_cast<T>(v9_), static_cast<T>(v10_), static_cast<T>(v11_),
        static_cast<T>(v12_), static_cast<T>(v13_), static_cast<T>(v14_),
        static_cast<T>(v15_), static_cast<T>(v16_), static_cast<T>(v17_),
        static_cast<T>(v18_), static_cast<T>(v19_), static_cast<T>(v20_),
        static_cast<T>(v21_), static_cast<T>(v22_), static_cast<T>(v23_),
        static_cast<T>(v24_), static_cast<T>(v25_), static_cast<T>(v26_),
        static_cast<T>(v27_), static_cast<T>(v28_), static_cast<T>(v29_),
        static_cast<T>(v30_), static_cast<T>(v31_), static_cast<T>(v32_),
        static_cast<T>(v33_), static_cast<T>(v34_)};
    return ValuesIn(array);
  }

 private:
  // No implementation - assignment is unsupported.
  void operator=(const ValueArray34& other);

  const T1 v1_;
  const T2 v2_;
  const T3 v3_;
  const T4 v4_;
  const T5 v5_;
  const T6 v6_;
  const T7 v7_;
  const T8 v8_;
  const T9 v9_;
  const T10 v10_;
  const T11 v11_;
  const T12 v12_;
  const T13 v13_;
  const T14 v14_;
  const T15 v15_;
  const T16 v16_;
  const T17 v17_;
  const T18 v18_;
  const T19 v19_;
  const T20 v20_;
  const T21 v21_;
  const T22 v22_;
  const T23 v23_;
  const T24 v24_;
  const T25 v25_;
  const T26 v26_;
  const T27 v27_;
  const T28 v28_;
  const T29 v29_;
  const T30 v30_;
  const T31 v31_;
  const T32 v32_;
  const T33 v33_;
  const T34 v34_;
};

template <typename T1, typename T2, typename T3, typename T4, typename T5,
    typename T6, typename T7, typename T8, typename T9, typename T10,
    typename T11, typename T12, typename T13, typename T14, typename T15,
    typename T16, typename T17, typename T18, typename T19, typename T20,
    typename T21, typename T22, typename T23, typename T24, typename T25,
    typename T26, typename T27, typename T28, typename T29, typename T30,
    typename T31, typename T32, typename T33, typename T34, typename T35>
class ValueArray35 {
 public:
  ValueArray35(T1 v1, T2 v2, T3 v3, T4 v4, T5 v5, T6 v6, T7 v7, T8 v8, T9 v9,
      T10 v10, T11 v11, T12 v12, T13 v13, T14 v14, T15 v15, T16 v16, T17 v17,
      T18 v18, T19 v19, T20 v20, T21 v21, T22 v22, T23 v23, T24 v24, T25 v25,
      T26 v26, T27 v27, T28 v28, T29 v29, T30 v30, T31 v31, T32 v32, T33 v33,
      T34 v34, T35 v35) : v1_(v1), v2_(v2), v3_(v3), v4_(v4), v5_(v5), v6_(v6),
      v7_(v7), v8_(v8), v9_(v9), v10_(v10), v11_(v11), v12_(v12), v13_(v13),
      v14_(v14), v15_(v15), v16_(v16), v17_(v17), v18_(v18), v19_(v19),
      v20_(v20), v21_(v21), v22_(v22), v23_(v23), v24_(v24), v25_(v25),
      v26_(v26), v27_(v27), v28_(v28), v29_(v29), v30_(v30), v31_(v31),
      v32_(v32), v33_(v33), v34_(v34), v35_(v35) {}

  template <typename T>
  operator ParamGenerator<T>() const {
    const T array[] = {static_cast<T>(v1_), static_cast<T>(v2_),
        static_cast<T>(v3_), static_cast<T>(v4_), static_cast<T>(v5_),
        static_cast<T>(v6_), static_cast<T>(v7_), static_cast<T>(v8_),
        static_cast<T>(v9_), static_cast<T>(v10_), static_cast<T>(v11_),
        static_cast<T>(v12_), static_cast<T>(v13_), static_cast<T>(v14_),
        static_cast<T>(v15_), static_cast<T>(v16_), static_cast<T>(v17_),
        static_cast<T>(v18_), static_cast<T>(v19_), static_cast<T>(v20_),
        static_cast<T>(v21_), static_cast<T>(v22_), static_cast<T>(v23_),
        static_cast<T>(v24_), static_cast<T>(v25_), static_cast<T>(v26_),
        static_cast<T>(v27_), static_cast<T>(v28_), static_cast<T>(v29_),
        static_cast<T>(v30_), static_cast<T>(v31_), static_cast<T>(v32_),
        static_cast<T>(v33_), static_cast<T>(v34_), static_cast<T>(v35_)};
    return ValuesIn(array);
  }

 private:
  // No implementation - assignment is unsupported.
  void operator=(const ValueArray35& other);

  const T1 v1_;
  const T2 v2_;
  const T3 v3_;
  const T4 v4_;
  const T5 v5_;
  const T6 v6_;
  const T7 v7_;
  const T8 v8_;
  const T9 v9_;
  const T10 v10_;
  const T11 v11_;
  const T12 v12_;
  const T13 v13_;
  const T14 v14_;
  const T15 v15_;
  const T16 v16_;
  const T17 v17_;
  const T18 v18_;
  const T19 v19_;
  const T20 v20_;
  const T21 v21_;
  const T22 v22_;
  const T23 v23_;
  const T24 v24_;
  const T25 v25_;
  const T26 v26_;
  const T27 v27_;
  const T28 v28_;
  const T29 v29_;
  const T30 v30_;
  const T31 v31_;
  const T32 v32_;
  const T33 v33_;
  const T34 v34_;
  const T35 v35_;
};

template <typename T1, typename T2, typename T3, typename T4, typename T5,
    typename T6, typename T7, typename T8, typename T9, typename T10,
    typename T11, typename T12, typename T13, typename T14, typename T15,
    typename T16, typename T17, typename T18, typename T19, typename T20,
    typename T21, typename T22, typename T23, typename T24, typename T25,
    typename T26, typename T27, typename T28, typename T29, typename T30,
    typename T31, typename T32, typename T33, typename T34, typename T35,
    typename T36>
class ValueArray36 {
 public:
  ValueArray36(T1 v1, T2 v2, T3 v3, T4 v4, T5 v5, T6 v6, T7 v7, T8 v8, T9 v9,
      T10 v10, T11 v11, T12 v12, T13 v13, T14 v14, T15 v15, T16 v16, T17 v17,
      T18 v18, T19 v19, T20 v20, T21 v21, T22 v22, T23 v23, T24 v24, T25 v25,
      T26 v26, T27 v27, T28 v28, T29 v29, T30 v30, T31 v31, T32 v32, T33 v33,
      T34 v34, T35 v35, T36 v36) : v1_(v1), v2_(v2), v3_(v3), v4_(v4), v5_(v5),
      v6_(v6), v7_(v7), v8_(v8), v9_(v9), v10_(v10), v11_(v11), v12_(v12),
      v13_(v13), v14_(v14), v15_(v15), v16_(v16), v17_(v17), v18_(v18),
      v19_(v19), v20_(v20), v21_(v21), v22_(v22), v23_(v23), v24_(v24),
      v25_(v25), v26_(v26), v27_(v27), v28_(v28), v29_(v29), v30_(v30),
      v31_(v31), v32_(v32), v33_(v33), v34_(v34), v35_(v35), v36_(v36) {}

  template <typename T>
  operator ParamGenerator<T>() const {
    const T array[] = {static_cast<T>(v1_), static_cast<T>(v2_),
        static_cast<T>(v3_), static_cast<T>(v4_), static_cast<T>(v5_),
        static_cast<T>(v6_), static_cast<T>(v7_), static_cast<T>(v8_),
        static_cast<T>(v9_), static_cast<T>(v10_), static_cast<T>(v11_),
        static_cast<T>(v12_), static_cast<T>(v13_), static_cast<T>(v14_),
        static_cast<T>(v15_), static_cast<T>(v16_), static_cast<T>(v17_),
        static_cast<T>(v18_), static_cast<T>(v19_), static_cast<T>(v20_),
        static_cast<T>(v21_), static_cast<T>(v22_), static_cast<T>(v23_),
        static_cast<T>(v24_), static_cast<T>(v25_), static_cast<T>(v26_),
        static_cast<T>(v27_), static_cast<T>(v28_), static_cast<T>(v29_),
        static_cast<T>(v30_), static_cast<T>(v31_), static_cast<T>(v32_),
        static_cast<T>(v33_), static_cast<T>(v34_), static_cast<T>(v35_),
        static_cast<T>(v36_)};
    return ValuesIn(array);
  }

 private:
  // No implementation - assignment is unsupported.
  void operator=(const ValueArray36& other);

  const T1 v1_;
  const T2 v2_;
  const T3 v3_;
  const T4 v4_;
  const T5 v5_;
  const T6 v6_;
  const T7 v7_;
  const T8 v8_;
  const T9 v9_;
  const T10 v10_;
  const T11 v11_;
  const T12 v12_;
  const T13 v13_;
  const T14 v14_;
  const T15 v15_;
  const T16 v16_;
  const T17 v17_;
  const T18 v18_;
  const T19 v19_;
  const T20 v20_;
  const T21 v21_;
  const T22 v22_;
  const T23 v23_;
  const T24 v24_;
  const T25 v25_;
  const T26 v26_;
  const T27 v27_;
  const T28 v28_;
  const T29 v29_;
  const T30 v30_;
  const T31 v31_;
  const T32 v32_;
  const T33 v33_;
  const T34 v34_;
  const T35 v35_;
  const T36 v36_;
};

template <typename T1, typename T2, typename T3, typename T4, typename T5,
    typename T6, typename T7, typename T8, typename T9, typename T10,
    typename T11, typename T12, typename T13, typename T14, typename T15,
    typename T16, typename T17, typename T18, typename T19, typename T20,
    typename T21, typename T22, typename T23, typename T24, typename T25,
    typename T26, typename T27, typename T28, typename T29, typename T30,
    typename T31, typename T32, typename T33, typename T34, typename T35,
    typename T36, typename T37>
class ValueArray37 {
 public:
  ValueArray37(T1 v1, T2 v2, T3 v3, T4 v4, T5 v5, T6 v6, T7 v7, T8 v8, T9 v9,
      T10 v10, T11 v11, T12 v12, T13 v13, T14 v14, T15 v15, T16 v16, T17 v17,
      T18 v18, T19 v19, T20 v20, T21 v21, T22 v22, T23 v23, T24 v24, T25 v25,
      T26 v26, T27 v27, T28 v28, T29 v29, T30 v30, T31 v31, T32 v32, T33 v33,
      T34 v34, T35 v35, T36 v36, T37 v37) : v1_(v1), v2_(v2), v3_(v3), v4_(v4),
      v5_(v5), v6_(v6), v7_(v7), v8_(v8), v9_(v9), v10_(v10), v11_(v11),
      v12_(v12), v13_(v13), v14_(v14), v15_(v15), v16_(v16), v17_(v17),
      v18_(v18), v19_(v19), v20_(v20), v21_(v21), v22_(v22), v23_(v23),
      v24_(v24), v25_(v25), v26_(v26), v27_(v27), v28_(v28), v29_(v29),
      v30_(v30), v31_(v31), v32_(v32), v33_(v33), v34_(v34), v35_(v35),
      v36_(v36), v37_(v37) {}

  template <typename T>
  operator ParamGenerator<T>() const {
    const T array[] = {static_cast<T>(v1_), static_cast<T>(v2_),
        static_cast<T>(v3_), static_cast<T>(v4_), static_cast<T>(v5_),
        static_cast<T>(v6_), static_cast<T>(v7_), static_cast<T>(v8_),
        static_cast<T>(v9_), static_cast<T>(v10_), static_cast<T>(v11_),
        static_cast<T>(v12_), static_cast<T>(v13_), static_cast<T>(v14_),
        static_cast<T>(v15_), static_cast<T>(v16_), static_cast<T>(v17_),
        static_cast<T>(v18_), static_cast<T>(v19_), static_cast<T>(v20_),
        static_cast<T>(v21_), static_cast<T>(v22_), static_cast<T>(v23_),
        static_cast<T>(v24_), static_cast<T>(v25_), static_cast<T>(v26_),
        static_cast<T>(v27_), static_cast<T>(v28_), static_cast<T>(v29_),
        static_cast<T>(v30_), static_cast<T>(v31_), static_cast<T>(v32_),
        static_cast<T>(v33_), static_cast<T>(v34_), static_cast<T>(v35_),
        static_cast<T>(v36_), static_cast<T>(v37_)};
    return ValuesIn(array);
  }

 private:
  // No implementation - assignment is unsupported.
  void operator=(const ValueArray37& other);

  const T1 v1_;
  const T2 v2_;
  const T3 v3_;
  const T4 v4_;
  const T5 v5_;
  const T6 v6_;
  const T7 v7_;
  const T8 v8_;
  const T9 v9_;
  const T10 v10_;
  const T11 v11_;
  const T12 v12_;
  const T13 v13_;
  const T14 v14_;
  const T15 v15_;
  const T16 v16_;
  const T17 v17_;
  const T18 v18_;
  const T19 v19_;
  const T20 v20_;
  const T21 v21_;
  const T22 v22_;
  const T23 v23_;
  const T24 v24_;
  const T25 v25_;
  const T26 v26_;
  const T27 v27_;
  const T28 v28_;
  const T29 v29_;
  const T30 v30_;
  const T31 v31_;
  const T32 v32_;
  const T33 v33_;
  const T34 v34_;
  const T35 v35_;
  const T36 v36_;
  const T37 v37_;
};

template <typename T1, typename T2, typename T3, typename T4, typename T5,
    typename T6, typename T7, typename T8, typename T9, typename T10,
    typename T11, typename T12, typename T13, typename T14, typename T15,
    typename T16, typename T17, typename T18, typename T19, typename T20,
    typename T21, typename T22, typename T23, typename T24, typename T25,
    typename T26, typename T27, typename T28, typename T29, typename T30,
    typename T31, typename T32, typename T33, typename T34, typename T35,
    typename T36, typename T37, typename T38>
class ValueArray38 {
 public:
  ValueArray38(T1 v1, T2 v2, T3 v3, T4 v4, T5 v5, T6 v6, T7 v7, T8 v8, T9 v9,
      T10 v10, T11 v11, T12 v12, T13 v13, T14 v14, T15 v15, T16 v16, T17 v17,
      T18 v18, T19 v19, T20 v20, T21 v21, T22 v22, T23 v23, T24 v24, T25 v25,
      T26 v26, T27 v27, T28 v28, T29 v29, T30 v30, T31 v31, T32 v32, T33 v33,
      T34 v34, T35 v35, T36 v36, T37 v37, T38 v38) : v1_(v1), v2_(v2), v3_(v3),
      v4_(v4), v5_(v5), v6_(v6), v7_(v7), v8_(v8), v9_(v9), v10_(v10),
      v11_(v11), v12_(v12), v13_(v13), v14_(v14), v15_(v15), v16_(v16),
      v17_(v17), v18_(v18), v19_(v19), v20_(v20), v21_(v21), v22_(v22),
      v23_(v23), v24_(v24), v25_(v25), v26_(v26), v27_(v27), v28_(v28),
      v29_(v29), v30_(v30), v31_(v31), v32_(v32), v33_(v33), v34_(v34),
      v35_(v35), v36_(v36), v37_(v37), v38_(v38) {}

  template <typename T>
  operator ParamGenerator<T>() const {
    const T array[] = {static_cast<T>(v1_), static_cast<T>(v2_),
        static_cast<T>(v3_), static_cast<T>(v4_), static_cast<T>(v5_),
        static_cast<T>(v6_), static_cast<T>(v7_), static_cast<T>(v8_),
        static_cast<T>(v9_), static_cast<T>(v10_), static_cast<T>(v11_),
        static_cast<T>(v12_), static_cast<T>(v13_), static_cast<T>(v14_),
        static_cast<T>(v15_), static_cast<T>(v16_), static_cast<T>(v17_),
        static_cast<T>(v18_), static_cast<T>(v19_), static_cast<T>(v20_),
        static_cast<T>(v21_), static_cast<T>(v22_), static_cast<T>(v23_),
        static_cast<T>(v24_), static_cast<T>(v25_), static_cast<T>(v26_),
        static_cast<T>(v27_), static_cast<T>(v28_), static_cast<T>(v29_),
        static_cast<T>(v30_), static_cast<T>(v31_), static_cast<T>(v32_),
        static_cast<T>(v33_), static_cast<T>(v34_), static_cast<T>(v35_),
        static_cast<T>(v36_), static_cast<T>(v37_), static_cast<T>(v38_)};
    return ValuesIn(array);
  }

 private:
  // No implementation - assignment is unsupported.
  void operator=(const ValueArray38& other);

  const T1 v1_;
  const T2 v2_;
  const T3 v3_;
  const T4 v4_;
  const T5 v5_;
  const T6 v6_;
  const T7 v7_;
  const T8 v8_;
  const T9 v9_;
  const T10 v10_;
  const T11 v11_;
  const T12 v12_;
  const T13 v13_;
  const T14 v14_;
  const T15 v15_;
  const T16 v16_;
  const T17 v17_;
  const T18 v18_;
  const T19 v19_;
  const T20 v20_;
  const T21 v21_;
  const T22 v22_;
  const T23 v23_;
  const T24 v24_;
  const T25 v25_;
  const T26 v26_;
  const T27 v27_;
  const T28 v28_;
  const T29 v29_;
  const T30 v30_;
  const T31 v31_;
  const T32 v32_;
  const T33 v33_;
  const T34 v34_;
  const T35 v35_;
  const T36 v36_;
  const T37 v37_;
  const T38 v38_;
};

template <typename T1, typename T2, typename T3, typename T4, typename T5,
    typename T6, typename T7, typename T8, typename T9, typename T10,
    typename T11, typename T12, typename T13, typename T14, typename T15,
    typename T16, typename T17, typename T18, typename T19, typename T20,
    typename T21, typename T22, typename T23, typename T24, typename T25,
    typename T26, typename T27, typename T28, typename T29, typename T30,
    typename T31, typename T32, typename T33, typename T34, typename T35,
    typename T36, typename T37, typename T38, typename T39>
class ValueArray39 {
 public:
  ValueArray39(T1 v1, T2 v2, T3 v3, T4 v4, T5 v5, T6 v6, T7 v7, T8 v8, T9 v9,
      T10 v10, T11 v11, T12 v12, T13 v13, T14 v14, T15 v15, T16 v16, T17 v17,
      T18 v18, T19 v19, T20 v20, T21 v21, T22 v22, T23 v23, T24 v24, T25 v25,
      T26 v26, T27 v27, T28 v28, T29 v29, T30 v30, T31 v31, T32 v32, T33 v33,
      T34 v34, T35 v35, T36 v36, T37 v37, T38 v38, T39 v39) : v1_(v1), v2_(v2),
      v3_(v3), v4_(v4), v5_(v5), v6_(v6), v7_(v7), v8_(v8), v9_(v9), v10_(v10),
      v11_(v11), v12_(v12), v13_(v13), v14_(v14), v15_(v15), v16_(v16),
      v17_(v17), v18_(v18), v19_(v19), v20_(v20), v21_(v21), v22_(v22),
      v23_(v23), v24_(v24), v25_(v25), v26_(v26), v27_(v27), v28_(v28),
      v29_(v29), v30_(v30), v31_(v31), v32_(v32), v33_(v33), v34_(v34),
      v35_(v35), v36_(v36), v37_(v37), v38_(v38), v39_(v39) {}

  template <typename T>
  operator ParamGenerator<T>() const {
    const T array[] = {static_cast<T>(v1_), static_cast<T>(v2_),
        static_cast<T>(v3_), static_cast<T>(v4_), static_cast<T>(v5_),
        static_cast<T>(v6_), static_cast<T>(v7_), static_cast<T>(v8_),
        static_cast<T>(v9_), static_cast<T>(v10_), static_cast<T>(v11_),
        static_cast<T>(v12_), static_cast<T>(v13_), static_cast<T>(v14_),
        static_cast<T>(v15_), static_cast<T>(v16_), static_cast<T>(v17_),
        static_cast<T>(v18_), static_cast<T>(v19_), static_cast<T>(v20_),
        static_cast<T>(v21_), static_cast<T>(v22_), static_cast<T>(v23_),
        static_cast<T>(v24_), static_cast<T>(v25_), static_cast<T>(v26_),
        static_cast<T>(v27_), static_cast<T>(v28_), static_cast<T>(v29_),
        static_cast<T>(v30_), static_cast<T>(v31_), static_cast<T>(v32_),
        static_cast<T>(v33_), static_cast<T>(v34_), static_cast<T>(v35_),
        static_cast<T>(v36_), static_cast<T>(v37_), static_cast<T>(v38_),
        static_cast<T>(v39_)};
    return ValuesIn(array);
  }

 private:
  // No implementation - assignment is unsupported.
  void operator=(const ValueArray39& other);

  const T1 v1_;
  const T2 v2_;
  const T3 v3_;
  const T4 v4_;
  const T5 v5_;
  const T6 v6_;
  const T7 v7_;
  const T8 v8_;
  const T9 v9_;
  const T10 v10_;
  const T11 v11_;
  const T12 v12_;
  const T13 v13_;
  const T14 v14_;
  const T15 v15_;
  const T16 v16_;
  const T17 v17_;
  const T18 v18_;
  const T19 v19_;
  const T20 v20_;
  const T21 v21_;
  const T22 v22_;
  const T23 v23_;
  const T24 v24_;
  const T25 v25_;
  const T26 v26_;
  const T27 v27_;
  const T28 v28_;
  const T29 v29_;
  const T30 v30_;
  const T31 v31_;
  const T32 v32_;
  const T33 v33_;
  const T34 v34_;
  const T35 v35_;
  const T36 v36_;
  const T37 v37_;
  const T38 v38_;
  const T39 v39_;
};

template <typename T1, typename T2, typename T3, typename T4, typename T5,
    typename T6, typename T7, typename T8, typename T9, typename T10,
    typename T11, typename T12, typename T13, typename T14, typename T15,
    typename T16, typename T17, typename T18, typename T19, typename T20,
    typename T21, typename T22, typename T23, typename T24, typename T25,
    typename T26, typename T27, typename T28, typename T29, typename T30,
    typename T31, typename T32, typename T33, typename T34, typename T35,
    typename T36, typename T37, typename T38, typename T39, typename T40>
class ValueArray40 {
 public:
  ValueArray40(T1 v1, T2 v2, T3 v3, T4 v4, T5 v5, T6 v6, T7 v7, T8 v8, T9 v9,
      T10 v10, T11 v11, T12 v12, T13 v13, T14 v14, T15 v15, T16 v16, T17 v17,
      T18 v18, T19 v19, T20 v20, T21 v21, T22 v22, T23 v23, T24 v24, T25 v25,
      T26 v26, T27 v27, T28 v28, T29 v29, T30 v30, T31 v31, T32 v32, T33 v33,
      T34 v34, T35 v35, T36 v36, T37 v37, T38 v38, T39 v39, T40 v40) : v1_(v1),
      v2_(v2), v3_(v3), v4_(v4), v5_(v5), v6_(v6), v7_(v7), v8_(v8), v9_(v9),
      v10_(v10), v11_(v11), v12_(v12), v13_(v13), v14_(v14), v15_(v15),
      v16_(v16), v17_(v17), v18_(v18), v19_(v19), v20_(v20), v21_(v21),
      v22_(v22), v23_(v23), v24_(v24), v25_(v25), v26_(v26), v27_(v27),
      v28_(v28), v29_(v29), v30_(v30), v31_(v31), v32_(v32), v33_(v33),
      v34_(v34), v35_(v35), v36_(v36), v37_(v37), v38_(v38), v39_(v39),
      v40_(v40) {}

  template <typename T>
  operator ParamGenerator<T>() const {
    const T array[] = {static_cast<T>(v1_), static_cast<T>(v2_),
        static_cast<T>(v3_), static_cast<T>(v4_), static_cast<T>(v5_),
        static_cast<T>(v6_), static_cast<T>(v7_), static_cast<T>(v8_),
        static_cast<T>(v9_), static_cast<T>(v10_), static_cast<T>(v11_),
        static_cast<T>(v12_), static_cast<T>(v13_), static_cast<T>(v14_),
        static_cast<T>(v15_), static_cast<T>(v16_), static_cast<T>(v17_),
        static_cast<T>(v18_), static_cast<T>(v19_), static_cast<T>(v20_),
        static_cast<T>(v21_), static_cast<T>(v22_), static_cast<T>(v23_),
        static_cast<T>(v24_), static_cast<T>(v25_), static_cast<T>(v26_),
        static_cast<T>(v27_), static_cast<T>(v28_), static_cast<T>(v29_),
        static_cast<T>(v30_), static_cast<T>(v31_), static_cast<T>(v32_),
        static_cast<T>(v33_), static_cast<T>(v34_), static_cast<T>(v35_),
        static_cast<T>(v36_), static_cast<T>(v37_), static_cast<T>(v38_),
        static_cast<T>(v39_), static_cast<T>(v40_)};
    return ValuesIn(array);
  }

 private:
  // No implementation - assignment is unsupported.
  void operator=(const ValueArray40& other);

  const T1 v1_;
  const T2 v2_;
  const T3 v3_;
  const T4 v4_;
  const T5 v5_;
  const T6 v6_;
  const T7 v7_;
  const T8 v8_;
  const T9 v9_;
  const T10 v10_;
  const T11 v11_;
  const T12 v12_;
  const T13 v13_;
  const T14 v14_;
  const T15 v15_;
  const T16 v16_;
  const T17 v17_;
  const T18 v18_;
  const T19 v19_;
  const T20 v20_;
  const T21 v21_;
  const T22 v22_;
  const T23 v23_;
  const T24 v24_;
  const T25 v25_;
  const T26 v26_;
  const T27 v27_;
  const T28 v28_;
  const T29 v29_;
  const T30 v30_;
  const T31 v31_;
  const T32 v32_;
  const T33 v33_;
  const T34 v34_;
  const T35 v35_;
  const T36 v36_;
  const T37 v37_;
  const T38 v38_;
  const T39 v39_;
  const T40 v40_;
};

template <typename T1, typename T2, typename T3, typename T4, typename T5,
    typename T6, typename T7, typename T8, typename T9, typename T10,
    typename T11, typename T12, typename T13, typename T14, typename T15,
    typename T16, typename T17, typename T18, typename T19, typename T20,
    typename T21, typename T22, typename T23, typename T24, typename T25,
    typename T26, typename T27, typename T28, typename T29, typename T30,
    typename T31, typename T32, typename T33, typename T34, typename T35,
    typename T36, typename T37, typename T38, typename T39, typename T40,
    typename T41>
class ValueArray41 {
 public:
  ValueArray41(T1 v1, T2 v2, T3 v3, T4 v4, T5 v5, T6 v6, T7 v7, T8 v8, T9 v9,
      T10 v10, T11 v11, T12 v12, T13 v13, T14 v14, T15 v15, T16 v16, T17 v17,
      T18 v18, T19 v19, T20 v20, T21 v21, T22 v22, T23 v23, T24 v24, T25 v25,
      T26 v26, T27 v27, T28 v28, T29 v29, T30 v30, T31 v31, T32 v32, T33 v33,
      T34 v34, T35 v35, T36 v36, T37 v37, T38 v38, T39 v39, T40 v40,
      T41 v41) : v1_(v1), v2_(v2), v3_(v3), v4_(v4), v5_(v5), v6_(v6), v7_(v7),
      v8_(v8), v9_(v9), v10_(v10), v11_(v11), v12_(v12), v13_(v13), v14_(v14),
      v15_(v15), v16_(v16), v17_(v17), v18_(v18), v19_(v19), v20_(v20),
      v21_(v21), v22_(v22), v23_(v23), v24_(v24), v25_(v25), v26_(v26),
      v27_(v27), v28_(v28), v29_(v29), v30_(v30), v31_(v31), v32_(v32),
      v33_(v33), v34_(v34), v35_(v35), v36_(v36), v37_(v37), v38_(v38),
      v39_(v39), v40_(v40), v41_(v41) {}

  template <typename T>
  operator ParamGenerator<T>() const {
    const T array[] = {static_cast<T>(v1_), static_cast<T>(v2_),
        static_cast<T>(v3_), static_cast<T>(v4_), static_cast<T>(v5_),
        static_cast<T>(v6_), static_cast<T>(v7_), static_cast<T>(v8_),
        static_cast<T>(v9_), static_cast<T>(v10_), static_cast<T>(v11_),
        static_cast<T>(v12_), static_cast<T>(v13_), static_cast<T>(v14_),
        static_cast<T>(v15_), static_cast<T>(v16_), static_cast<T>(v17_),
        static_cast<T>(v18_), static_cast<T>(v19_), static_cast<T>(v20_),
        static_cast<T>(v21_), static_cast<T>(v22_), static_cast<T>(v23_),
        static_cast<T>(v24_), static_cast<T>(v25_), static_cast<T>(v26_),
        static_cast<T>(v27_), static_cast<T>(v28_), static_cast<T>(v29_),
        static_cast<T>(v30_), static_cast<T>(v31_), static_cast<T>(v32_),
        static_cast<T>(v33_), static_cast<T>(v34_), static_cast<T>(v35_),
        static_cast<T>(v36_), static_cast<T>(v37_), static_cast<T>(v38_),
        static_cast<T>(v39_), static_cast<T>(v40_), static_cast<T>(v41_)};
    return ValuesIn(array);
  }

 private:
  // No implementation - assignment is unsupported.
  void operator=(const ValueArray41& other);

  const T1 v1_;
  const T2 v2_;
  const T3 v3_;
  const T4 v4_;
  const T5 v5_;
  const T6 v6_;
  const T7 v7_;
  const T8 v8_;
  const T9 v9_;
  const T10 v10_;
  const T11 v11_;
  const T12 v12_;
  const T13 v13_;
  const T14 v14_;
  const T15 v15_;
  const T16 v16_;
  const T17 v17_;
  const T18 v18_;
  const T19 v19_;
  const T20 v20_;
  const T21 v21_;
  const T22 v22_;
  const T23 v23_;
  const T24 v24_;
  const T25 v25_;
  const T26 v26_;
  const T27 v27_;
  const T28 v28_;
  const T29 v29_;
  const T30 v30_;
  const T31 v31_;
  const T32 v32_;
  const T33 v33_;
  const T34 v34_;
  const T35 v35_;
  const T36 v36_;
  const T37 v37_;
  const T38 v38_;
  const T39 v39_;
  const T40 v40_;
  const T41 v41_;
};

template <typename T1, typename T2, typename T3, typename T4, typename T5,
    typename T6, typename T7, typename T8, typename T9, typename T10,
    typename T11, typename T12, typename T13, typename T14, typename T15,
    typename T16, typename T17, typename T18, typename T19, typename T20,
    typename T21, typename T22, typename T23, typename T24, typename T25,
    typename T26, typename T27, typename T28, typename T29, typename T30,
    typename T31, typename T32, typename T33, typename T34, typename T35,
    typename T36, typename T37, typename T38, typename T39, typename T40,
    typename T41, typename T42>
class ValueArray42 {
 public:
  ValueArray42(T1 v1, T2 v2, T3 v3, T4 v4, T5 v5, T6 v6, T7 v7, T8 v8, T9 v9,
      T10 v10, T11 v11, T12 v12, T13 v13, T14 v14, T15 v15, T16 v16, T17 v17,
      T18 v18, T19 v19, T20 v20, T21 v21, T22 v22, T23 v23, T24 v24, T25 v25,
      T26 v26, T27 v27, T28 v28, T29 v29, T30 v30, T31 v31, T32 v32, T33 v33,
      T34 v34, T35 v35, T36 v36, T37 v37, T38 v38, T39 v39, T40 v40, T41 v41,
      T42 v42) : v1_(v1), v2_(v2), v3_(v3), v4_(v4), v5_(v5), v6_(v6), v7_(v7),
      v8_(v8), v9_(v9), v10_(v10), v11_(v11), v12_(v12), v13_(v13), v14_(v14),
      v15_(v15), v16_(v16), v17_(v17), v18_(v18), v19_(v19), v20_(v20),
      v21_(v21), v22_(v22), v23_(v23), v24_(v24), v25_(v25), v26_(v26),
      v27_(v27), v28_(v28), v29_(v29), v30_(v30), v31_(v31), v32_(v32),
      v33_(v33), v34_(v34), v35_(v35), v36_(v36), v37_(v37), v38_(v38),
      v39_(v39), v40_(v40), v41_(v41), v42_(v42) {}

  template <typename T>
  operator ParamGenerator<T>() const {
    const T array[] = {static_cast<T>(v1_), static_cast<T>(v2_),
        static_cast<T>(v3_), static_cast<T>(v4_), static_cast<T>(v5_),
        static_cast<T>(v6_), static_cast<T>(v7_), static_cast<T>(v8_),
        static_cast<T>(v9_), static_cast<T>(v10_), static_cast<T>(v11_),
        static_cast<T>(v12_), static_cast<T>(v13_), static_cast<T>(v14_),
        static_cast<T>(v15_), static_cast<T>(v16_), static_cast<T>(v17_),
        static_cast<T>(v18_), static_cast<T>(v19_), static_cast<T>(v20_),
        static_cast<T>(v21_), static_cast<T>(v22_), static_cast<T>(v23_),
        static_cast<T>(v24_), static_cast<T>(v25_), static_cast<T>(v26_),
        static_cast<T>(v27_), static_cast<T>(v28_), static_cast<T>(v29_),
        static_cast<T>(v30_), static_cast<T>(v31_), static_cast<T>(v32_),
        static_cast<T>(v33_), static_cast<T>(v34_), static_cast<T>(v35_),
        static_cast<T>(v36_), static_cast<T>(v37_), static_cast<T>(v38_),
        static_cast<T>(v39_), static_cast<T>(v40_), static_cast<T>(v41_),
        static_cast<T>(v42_)};
    return ValuesIn(array);
  }

 private:
  // No implementation - assignment is unsupported.
  void operator=(const ValueArray42& other);

  const T1 v1_;
  const T2 v2_;
  const T3 v3_;
  const T4 v4_;
  const T5 v5_;
  const T6 v6_;
  const T7 v7_;
  const T8 v8_;
  const T9 v9_;
  const T10 v10_;
  const T11 v11_;
  const T12 v12_;
  const T13 v13_;
  const T14 v14_;
  const T15 v15_;
  const T16 v16_;
  const T17 v17_;
  const T18 v18_;
  const T19 v19_;
  const T20 v20_;
  const T21 v21_;
  const T22 v22_;
  const T23 v23_;
  const T24 v24_;
  const T25 v25_;
  const T26 v26_;
  const T27 v27_;
  const T28 v28_;
  const T29 v29_;
  const T30 v30_;
  const T31 v31_;
  const T32 v32_;
  const T33 v33_;
  const T34 v34_;
  const T35 v35_;
  const T36 v36_;
  const T37 v37_;
  const T38 v38_;
  const T39 v39_;
  const T40 v40_;
  const T41 v41_;
  const T42 v42_;
};

template <typename T1, typename T2, typename T3, typename T4, typename T5,
    typename T6, typename T7, typename T8, typename T9, typename T10,
    typename T11, typename T12, typename T13, typename T14, typename T15,
    typename T16, typename T17, typename T18, typename T19, typename T20,
    typename T21, typename T22, typename T23, typename T24, typename T25,
    typename T26, typename T27, typename T28, typename T29, typename T30,
    typename T31, typename T32, typename T33, typename T34, typename T35,
    typename T36, typename T37, typename T38, typename T39, typename T40,
    typename T41, typename T42, typename T43>
class ValueArray43 {
 public:
  ValueArray43(T1 v1, T2 v2, T3 v3, T4 v4, T5 v5, T6 v6, T7 v7, T8 v8, T9 v9,
      T10 v10, T11 v11, T12 v12, T13 v13, T14 v14, T15 v15, T16 v16, T17 v17,
      T18 v18, T19 v19, T20 v20, T21 v21, T22 v22, T23 v23, T24 v24, T25 v25,
      T26 v26, T27 v27, T28 v28, T29 v29, T30 v30, T31 v31, T32 v32, T33 v33,
      T34 v34, T35 v35, T36 v36, T37 v37, T38 v38, T39 v39, T40 v40, T41 v41,
      T42 v42, T43 v43) : v1_(v1), v2_(v2), v3_(v3), v4_(v4), v5_(v5), v6_(v6),
      v7_(v7), v8_(v8), v9_(v9), v10_(v10), v11_(v11), v12_(v12), v13_(v13),
      v14_(v14), v15_(v15), v16_(v16), v17_(v17), v18_(v18), v19_(v19),
      v20_(v20), v21_(v21), v22_(v22), v23_(v23), v24_(v24), v25_(v25),
      v26_(v26), v27_(v27), v28_(v28), v29_(v29), v30_(v30), v31_(v31),
      v32_(v32), v33_(v33), v34_(v34), v35_(v35), v36_(v36), v37_(v37),
      v38_(v38), v39_(v39), v40_(v40), v41_(v41), v42_(v42), v43_(v43) {}

  template <typename T>
  operator ParamGenerator<T>() const {
    const T array[] = {static_cast<T>(v1_), static_cast<T>(v2_),
        static_cast<T>(v3_), static_cast<T>(v4_), static_cast<T>(v5_),
        static_cast<T>(v6_), static_cast<T>(v7_), static_cast<T>(v8_),
        static_cast<T>(v9_), static_cast<T>(v10_), static_cast<T>(v11_),
        static_cast<T>(v12_), static_cast<T>(v13_), static_cast<T>(v14_),
        static_cast<T>(v15_), static_cast<T>(v16_), static_cast<T>(v17_),
        static_cast<T>(v18_), static_cast<T>(v19_), static_cast<T>(v20_),
        static_cast<T>(v21_), static_cast<T>(v22_), static_cast<T>(v23_),
        static_cast<T>(v24_), static_cast<T>(v25_), static_cast<T>(v26_),
        static_cast<T>(v27_), static_cast<T>(v28_), static_cast<T>(v29_),
        static_cast<T>(v30_), static_cast<T>(v31_), static_cast<T>(v32_),
        static_cast<T>(v33_), static_cast<T>(v34_), static_cast<T>(v35_),
        static_cast<T>(v36_), static_cast<T>(v37_), static_cast<T>(v38_),
        static_cast<T>(v39_), static_cast<T>(v40_), static_cast<T>(v41_),
        static_cast<T>(v42_), static_cast<T>(v43_)};
    return ValuesIn(array);
  }

 private:
  // No implementation - assignment is unsupported.
  void operator=(const ValueArray43& other);

  const T1 v1_;
  const T2 v2_;
  const T3 v3_;
  const T4 v4_;
  const T5 v5_;
  const T6 v6_;
  const T7 v7_;
  const T8 v8_;
  const T9 v9_;
  const T10 v10_;
  const T11 v11_;
  const T12 v12_;
  const T13 v13_;
  const T14 v14_;
  const T15 v15_;
  const T16 v16_;
  const T17 v17_;
  const T18 v18_;
  const T19 v19_;
  const T20 v20_;
  const T21 v21_;
  const T22 v22_;
  const T23 v23_;
  const T24 v24_;
  const T25 v25_;
  const T26 v26_;
  const T27 v27_;
  const T28 v28_;
  const T29 v29_;
  const T30 v30_;
  const T31 v31_;
  const T32 v32_;
  const T33 v33_;
  const T34 v34_;
  const T35 v35_;
  const T36 v36_;
  const T37 v37_;
  const T38 v38_;
  const T39 v39_;
  const T40 v40_;
  const T41 v41_;
  const T42 v42_;
  const T43 v43_;
};

template <typename T1, typename T2, typename T3, typename T4, typename T5,
    typename T6, typename T7, typename T8, typename T9, typename T10,
    typename T11, typename T12, typename T13, typename T14, typename T15,
    typename T16, typename T17, typename T18, typename T19, typename T20,
    typename T21, typename T22, typename T23, typename T24, typename T25,
    typename T26, typename T27, typename T28, typename T29, typename T30,
    typename T31, typename T32, typename T33, typename T34, typename T35,
    typename T36, typename T37, typename T38, typename T39, typename T40,
    typename T41, typename T42, typename T43, typename T44>
class ValueArray44 {
 public:
  ValueArray44(T1 v1, T2 v2, T3 v3, T4 v4, T5 v5, T6 v6, T7 v7, T8 v8, T9 v9,
      T10 v10, T11 v11, T12 v12, T13 v13, T14 v14, T15 v15, T16 v16, T17 v17,
      T18 v18, T19 v19, T20 v20, T21 v21, T22 v22, T23 v23, T24 v24, T25 v25,
      T26 v26, T27 v27, T28 v28, T29 v29, T30 v30, T31 v31, T32 v32, T33 v33,
      T34 v34, T35 v35, T36 v36, T37 v37, T38 v38, T39 v39, T40 v40, T41 v41,
      T42 v42, T43 v43, T44 v44) : v1_(v1), v2_(v2), v3_(v3), v4_(v4), v5_(v5),
      v6_(v6), v7_(v7), v8_(v8), v9_(v9), v10_(v10), v11_(v11), v12_(v12),
      v13_(v13), v14_(v14), v15_(v15), v16_(v16), v17_(v17), v18_(v18),
      v19_(v19), v20_(v20), v21_(v21), v22_(v22), v23_(v23), v24_(v24),
      v25_(v25), v26_(v26), v27_(v27), v28_(v28), v29_(v29), v30_(v30),
      v31_(v31), v32_(v32), v33_(v33), v34_(v34), v35_(v35), v36_(v36),
      v37_(v37), v38_(v38), v39_(v39), v40_(v40), v41_(v41), v42_(v42),
      v43_(v43), v44_(v44) {}

  template <typename T>
  operator ParamGenerator<T>() const {
    const T array[] = {static_cast<T>(v1_), static_cast<T>(v2_),
        static_cast<T>(v3_), static_cast<T>(v4_), static_cast<T>(v5_),
        static_cast<T>(v6_), static_cast<T>(v7_), static_cast<T>(v8_),
        static_cast<T>(v9_), static_cast<T>(v10_), static_cast<T>(v11_),
        static_cast<T>(v12_), static_cast<T>(v13_), static_cast<T>(v14_),
        static_cast<T>(v15_), static_cast<T>(v16_), static_cast<T>(v17_),
        static_cast<T>(v18_), static_cast<T>(v19_), static_cast<T>(v20_),
        static_cast<T>(v21_), static_cast<T>(v22_), static_cast<T>(v23_),
        static_cast<T>(v24_), static_cast<T>(v25_), static_cast<T>(v26_),
        static_cast<T>(v27_), static_cast<T>(v28_), static_cast<T>(v29_),
        static_cast<T>(v30_), static_cast<T>(v31_), static_cast<T>(v32_),
        static_cast<T>(v33_), static_cast<T>(v34_), static_cast<T>(v35_),
        static_cast<T>(v36_), static_cast<T>(v37_), static_cast<T>(v38_),
        static_cast<T>(v39_), static_cast<T>(v40_), static_cast<T>(v41_),
        static_cast<T>(v42_), static_cast<T>(v43_), static_cast<T>(v44_)};
    return ValuesIn(array);
  }

 private:
  // No implementation - assignment is unsupported.
  void operator=(const ValueArray44& other);

  const T1 v1_;
  const T2 v2_;
  const T3 v3_;
  const T4 v4_;
  const T5 v5_;
  const T6 v6_;
  const T7 v7_;
  const T8 v8_;
  const T9 v9_;
  const T10 v10_;
  const T11 v11_;
  const T12 v12_;
  const T13 v13_;
  const T14 v14_;
  const T15 v15_;
  const T16 v16_;
  const T17 v17_;
  const T18 v18_;
  const T19 v19_;
  const T20 v20_;
  const T21 v21_;
  const T22 v22_;
  const T23 v23_;
  const T24 v24_;
  const T25 v25_;
  const T26 v26_;
  const T27 v27_;
  const T28 v28_;
  const T29 v29_;
  const T30 v30_;
  const T31 v31_;
  const T32 v32_;
  const T33 v33_;
  const T34 v34_;
  const T35 v35_;
  const T36 v36_;
  const T37 v37_;
  const T38 v38_;
  const T39 v39_;
  const T40 v40_;
  const T41 v41_;
  const T42 v42_;
  const T43 v43_;
  const T44 v44_;
};

template <typename T1, typename T2, typename T3, typename T4, typename T5,
    typename T6, typename T7, typename T8, typename T9, typename T10,
    typename T11, typename T12, typename T13, typename T14, typename T15,
    typename T16, typename T17, typename T18, typename T19, typename T20,
    typename T21, typename T22, typename T23, typename T24, typename T25,
    typename T26, typename T27, typename T28, typename T29, typename T30,
    typename T31, typename T32, typename T33, typename T34, typename T35,
    typename T36, typename T37, typename T38, typename T39, typename T40,
    typename T41, typename T42, typename T43, typename T44, typename T45>
class ValueArray45 {
 public:
  ValueArray45(T1 v1, T2 v2, T3 v3, T4 v4, T5 v5, T6 v6, T7 v7, T8 v8, T9 v9,
      T10 v10, T11 v11, T12 v12, T13 v13, T14 v14, T15 v15, T16 v16, T17 v17,
      T18 v18, T19 v19, T20 v20, T21 v21, T22 v22, T23 v23, T24 v24, T25 v25,
      T26 v26, T27 v27, T28 v28, T29 v29, T30 v30, T31 v31, T32 v32, T33 v33,
      T34 v34, T35 v35, T36 v36, T37 v37, T38 v38, T39 v39, T40 v40, T41 v41,
      T42 v42, T43 v43, T44 v44, T45 v45) : v1_(v1), v2_(v2), v3_(v3), v4_(v4),
      v5_(v5), v6_(v6), v7_(v7), v8_(v8), v9_(v9), v10_(v10), v11_(v11),
      v12_(v12), v13_(v13), v14_(v14), v15_(v15), v16_(v16), v17_(v17),
      v18_(v18), v19_(v19), v20_(v20), v21_(v21), v22_(v22), v23_(v23),
      v24_(v24), v25_(v25), v26_(v26), v27_(v27), v28_(v28), v29_(v29),
      v30_(v30), v31_(v31), v32_(v32), v33_(v33), v34_(v34), v35_(v35),
      v36_(v36), v37_(v37), v38_(v38), v39_(v39), v40_(v40), v41_(v41),
      v42_(v42), v43_(v43), v44_(v44), v45_(v45) {}

  template <typename T>
  operator ParamGenerator<T>() const {
    const T array[] = {static_cast<T>(v1_), static_cast<T>(v2_),
        static_cast<T>(v3_), static_cast<T>(v4_), static_cast<T>(v5_),
        static_cast<T>(v6_), static_cast<T>(v7_), static_cast<T>(v8_),
        static_cast<T>(v9_), static_cast<T>(v10_), static_cast<T>(v11_),
        static_cast<T>(v12_), static_cast<T>(v13_), static_cast<T>(v14_),
        static_cast<T>(v15_), static_cast<T>(v16_), static_cast<T>(v17_),
        static_cast<T>(v18_), static_cast<T>(v19_), static_cast<T>(v20_),
        static_cast<T>(v21_), static_cast<T>(v22_), static_cast<T>(v23_),
        static_cast<T>(v24_), static_cast<T>(v25_), static_cast<T>(v26_),
        static_cast<T>(v27_), static_cast<T>(v28_), static_cast<T>(v29_),
        static_cast<T>(v30_), static_cast<T>(v31_), static_cast<T>(v32_),
        static_cast<T>(v33_), static_cast<T>(v34_), static_cast<T>(v35_),
        static_cast<T>(v36_), static_cast<T>(v37_), static_cast<T>(v38_),
        static_cast<T>(v39_), static_cast<T>(v40_), static_cast<T>(v41_),
        static_cast<T>(v42_), static_cast<T>(v43_), static_cast<T>(v44_),
        static_cast<T>(v45_)};
    return ValuesIn(array);
  }

 private:
  // No implementation - assignment is unsupported.
  void operator=(const ValueArray45& other);

  const T1 v1_;
  const T2 v2_;
  const T3 v3_;
  const T4 v4_;
  const T5 v5_;
  const T6 v6_;
  const T7 v7_;
  const T8 v8_;
  const T9 v9_;
  const T10 v10_;
  const T11 v11_;
  const T12 v12_;
  const T13 v13_;
  const T14 v14_;
  const T15 v15_;
  const T16 v16_;
  const T17 v17_;
  const T18 v18_;
  const T19 v19_;
  const T20 v20_;
  const T21 v21_;
  const T22 v22_;
  const T23 v23_;
  const T24 v24_;
  const T25 v25_;
  const T26 v26_;
  const T27 v27_;
  const T28 v28_;
  const T29 v29_;
  const T30 v30_;
  const T31 v31_;
  const T32 v32_;
  const T33 v33_;
  const T34 v34_;
  const T35 v35_;
  const T36 v36_;
  const T37 v37_;
  const T38 v38_;
  const T39 v39_;
  const T40 v40_;
  const T41 v41_;
  const T42 v42_;
  const T43 v43_;
  const T44 v44_;
  const T45 v45_;
};

template <typename T1, typename T2, typename T3, typename T4, typename T5,
    typename T6, typename T7, typename T8, typename T9, typename T10,
    typename T11, typename T12, typename T13, typename T14, typename T15,
    typename T16, typename T17, typename T18, typename T19, typename T20,
    typename T21, typename T22, typename T23, typename T24, typename T25,
    typename T26, typename T27, typename T28, typename T29, typename T30,
    typename T31, typename T32, typename T33, typename T34, typename T35,
    typename T36, typename T37, typename T38, typename T39, typename T40,
    typename T41, typename T42, typename T43, typename T44, typename T45,
    typename T46>
class ValueArray46 {
 public:
  ValueArray46(T1 v1, T2 v2, T3 v3, T4 v4, T5 v5, T6 v6, T7 v7, T8 v8, T9 v9,
      T10 v10, T11 v11, T12 v12, T13 v13, T14 v14, T15 v15, T16 v16, T17 v17,
      T18 v18, T19 v19, T20 v20, T21 v21, T22 v22, T23 v23, T24 v24, T25 v25,
      T26 v26, T27 v27, T28 v28, T29 v29, T30 v30, T31 v31, T32 v32, T33 v33,
      T34 v34, T35 v35, T36 v36, T37 v37, T38 v38, T39 v39, T40 v40, T41 v41,
      T42 v42, T43 v43, T44 v44, T45 v45, T46 v46) : v1_(v1), v2_(v2), v3_(v3),
      v4_(v4), v5_(v5), v6_(v6), v7_(v7), v8_(v8), v9_(v9), v10_(v10),
      v11_(v11), v12_(v12), v13_(v13), v14_(v14), v15_(v15), v16_(v16),
      v17_(v17), v18_(v18), v19_(v19), v20_(v20), v21_(v21), v22_(v22),
      v23_(v23), v24_(v24), v25_(v25), v26_(v26), v27_(v27), v28_(v28),
      v29_(v29), v30_(v30), v31_(v31), v32_(v32), v33_(v33), v34_(v34),
      v35_(v35), v36_(v36), v37_(v37), v38_(v38), v39_(v39), v40_(v40),
      v41_(v41), v42_(v42), v43_(v43), v44_(v44), v45_(v45), v46_(v46) {}

  template <typename T>
  operator ParamGenerator<T>() const {
    const T array[] = {static_cast<T>(v1_), static_cast<T>(v2_),
        static_cast<T>(v3_), static_cast<T>(v4_), static_cast<T>(v5_),
        static_cast<T>(v6_), static_cast<T>(v7_), static_cast<T>(v8_),
        static_cast<T>(v9_), static_cast<T>(v10_), static_cast<T>(v11_),
        static_cast<T>(v12_), static_cast<T>(v13_), static_cast<T>(v14_),
        static_cast<T>(v15_), static_cast<T>(v16_), static_cast<T>(v17_),
        static_cast<T>(v18_), static_cast<T>(v19_), static_cast<T>(v20_),
        static_cast<T>(v21_), static_cast<T>(v22_), static_cast<T>(v23_),
        static_cast<T>(v24_), static_cast<T>(v25_), static_cast<T>(v26_),
        static_cast<T>(v27_), static_cast<T>(v28_), static_cast<T>(v29_),
        static_cast<T>(v30_), static_cast<T>(v31_), static_cast<T>(v32_),
        static_cast<T>(v33_), static_cast<T>(v34_), static_cast<T>(v35_),
        static_cast<T>(v36_), static_cast<T>(v37_), static_cast<T>(v38_),
        static_cast<T>(v39_), static_cast<T>(v40_), static_cast<T>(v41_),
        static_cast<T>(v42_), static_cast<T>(v43_), static_cast<T>(v44_),
        static_cast<T>(v45_), static_cast<T>(v46_)};
    return ValuesIn(array);
  }

 private:
  // No implementation - assignment is unsupported.
  void operator=(const ValueArray46& other);

  const T1 v1_;
  const T2 v2_;
  const T3 v3_;
  const T4 v4_;
  const T5 v5_;
  const T6 v6_;
  const T7 v7_;
  const T8 v8_;
  const T9 v9_;
  const T10 v10_;
  const T11 v11_;
  const T12 v12_;
  const T13 v13_;
  const T14 v14_;
  const T15 v15_;
  const T16 v16_;
  const T17 v17_;
  const T18 v18_;
  const T19 v19_;
  const T20 v20_;
  const T21 v21_;
  const T22 v22_;
  const T23 v23_;
  const T24 v24_;
  const T25 v25_;
  const T26 v26_;
  const T27 v27_;
  const T28 v28_;
  const T29 v29_;
  const T30 v30_;
  const T31 v31_;
  const T32 v32_;
  const T33 v33_;
  const T34 v34_;
  const T35 v35_;
  const T36 v36_;
  const T37 v37_;
  const T38 v38_;
  const T39 v39_;
  const T40 v40_;
  const T41 v41_;
  const T42 v42_;
  const T43 v43_;
  const T44 v44_;
  const T45 v45_;
  const T46 v46_;
};

template <typename T1, typename T2, typename T3, typename T4, typename T5,
    typename T6, typename T7, typename T8, typename T9, typename T10,
    typename T11, typename T12, typename T13, typename T14, typename T15,
    typename T16, typename T17, typename T18, typename T19, typename T20,
    typename T21, typename T22, typename T23, typename T24, typename T25,
    typename T26, typename T27, typename T28, typename T29, typename T30,
    typename T31, typename T32, typename T33, typename T34, typename T35,
    typename T36, typename T37, typename T38, typename T39, typename T40,
    typename T41, typename T42, typename T43, typename T44, typename T45,
    typename T46, typename T47>
class ValueArray47 {
 public:
  ValueArray47(T1 v1, T2 v2, T3 v3, T4 v4, T5 v5, T6 v6, T7 v7, T8 v8, T9 v9,
      T10 v10, T11 v11, T12 v12, T13 v13, T14 v14, T15 v15, T16 v16, T17 v17,
      T18 v18, T19 v19, T20 v20, T21 v21, T22 v22, T23 v23, T24 v24, T25 v25,
      T26 v26, T27 v27, T28 v28, T29 v29, T30 v30, T31 v31, T32 v32, T33 v33,
      T34 v34, T35 v35, T36 v36, T37 v37, T38 v38, T39 v39, T40 v40, T41 v41,
      T42 v42, T43 v43, T44 v44, T45 v45, T46 v46, T47 v47) : v1_(v1), v2_(v2),
      v3_(v3), v4_(v4), v5_(v5), v6_(v6), v7_(v7), v8_(v8), v9_(v9), v10_(v10),
      v11_(v11), v12_(v12), v13_(v13), v14_(v14), v15_(v15), v16_(v16),
      v17_(v17), v18_(v18), v19_(v19), v20_(v20), v21_(v21), v22_(v22),
      v23_(v23), v24_(v24), v25_(v25), v26_(v26), v27_(v27), v28_(v28),
      v29_(v29), v30_(v30), v31_(v31), v32_(v32), v33_(v33), v34_(v34),
      v35_(v35), v36_(v36), v37_(v37), v38_(v38), v39_(v39), v40_(v40),
      v41_(v41), v42_(v42), v43_(v43), v44_(v44), v45_(v45), v46_(v46),
      v47_(v47) {}

  template <typename T>
  operator ParamGenerator<T>() const {
    const T array[] = {static_cast<T>(v1_), static_cast<T>(v2_),
        static_cast<T>(v3_), static_cast<T>(v4_), static_cast<T>(v5_),
        static_cast<T>(v6_), static_cast<T>(v7_), static_cast<T>(v8_),
        static_cast<T>(v9_), static_cast<T>(v10_), static_cast<T>(v11_),
        static_cast<T>(v12_), static_cast<T>(v13_), static_cast<T>(v14_),
        static_cast<T>(v15_), static_cast<T>(v16_), static_cast<T>(v17_),
        static_cast<T>(v18_), static_cast<T>(v19_), static_cast<T>(v20_),
        static_cast<T>(v21_), static_cast<T>(v22_), static_cast<T>(v23_),
        static_cast<T>(v24_), static_cast<T>(v25_), static_cast<T>(v26_),
        static_cast<T>(v27_), static_cast<T>(v28_), static_cast<T>(v29_),
        static_cast<T>(v30_), static_cast<T>(v31_), static_cast<T>(v32_),
        static_cast<T>(v33_), static_cast<T>(v34_), static_cast<T>(v35_),
        static_cast<T>(v36_), static_cast<T>(v37_), static_cast<T>(v38_),
        static_cast<T>(v39_), static_cast<T>(v40_), static_cast<T>(v41_),
        static_cast<T>(v42_), static_cast<T>(v43_), static_cast<T>(v44_),
        static_cast<T>(v45_), static_cast<T>(v46_), static_cast<T>(v47_)};
    return ValuesIn(array);
  }

 private:
  // No implementation - assignment is unsupported.
  void operator=(const ValueArray47& other);

  const T1 v1_;
  const T2 v2_;
  const T3 v3_;
  const T4 v4_;
  const T5 v5_;
  const T6 v6_;
  const T7 v7_;
  const T8 v8_;
  const T9 v9_;
  const T10 v10_;
  const T11 v11_;
  const T12 v12_;
  const T13 v13_;
  const T14 v14_;
  const T15 v15_;
  const T16 v16_;
  const T17 v17_;
  const T18 v18_;
  const T19 v19_;
  const T20 v20_;
  const T21 v21_;
  const T22 v22_;
  const T23 v23_;
  const T24 v24_;
  const T25 v25_;
  const T26 v26_;
  const T27 v27_;
  const T28 v28_;
  const T29 v29_;
  const T30 v30_;
  const T31 v31_;
  const T32 v32_;
  const T33 v33_;
  const T34 v34_;
  const T35 v35_;
  const T36 v36_;
  const T37 v37_;
  const T38 v38_;
  const T39 v39_;
  const T40 v40_;
  const T41 v41_;
  const T42 v42_;
  const T43 v43_;
  const T44 v44_;
  const T45 v45_;
  const T46 v46_;
  const T47 v47_;
};

template <typename T1, typename T2, typename T3, typename T4, typename T5,
    typename T6, typename T7, typename T8, typename T9, typename T10,
    typename T11, typename T12, typename T13, typename T14, typename T15,
    typename T16, typename T17, typename T18, typename T19, typename T20,
    typename T21, typename T22, typename T23, typename T24, typename T25,
    typename T26, typename T27, typename T28, typename T29, typename T30,
    typename T31, typename T32, typename T33, typename T34, typename T35,
    typename T36, typename T37, typename T38, typename T39, typename T40,
    typename T41, typename T42, typename T43, typename T44, typename T45,
    typename T46, typename T47, typename T48>
class ValueArray48 {
 public:
  ValueArray48(T1 v1, T2 v2, T3 v3, T4 v4, T5 v5, T6 v6, T7 v7, T8 v8, T9 v9,
      T10 v10, T11 v11, T12 v12, T13 v13, T14 v14, T15 v15, T16 v16, T17 v17,
      T18 v18, T19 v19, T20 v20, T21 v21, T22 v22, T23 v23, T24 v24, T25 v25,
      T26 v26, T27 v27, T28 v28, T29 v29, T30 v30, T31 v31, T32 v32, T33 v33,
      T34 v34, T35 v35, T36 v36, T37 v37, T38 v38, T39 v39, T40 v40, T41 v41,
      T42 v42, T43 v43, T44 v44, T45 v45, T46 v46, T47 v47, T48 v48) : v1_(v1),
      v2_(v2), v3_(v3), v4_(v4), v5_(v5), v6_(v6), v7_(v7), v8_(v8), v9_(v9),
      v10_(v10), v11_(v11), v12_(v12), v13_(v13), v14_(v14), v15_(v15),
      v16_(v16), v17_(v17), v18_(v18), v19_(v19), v20_(v20), v21_(v21),
      v22_(v22), v23_(v23), v24_(v24), v25_(v25), v26_(v26), v27_(v27),
      v28_(v28), v29_(v29), v30_(v30), v31_(v31), v32_(v32), v33_(v33),
      v34_(v34), v35_(v35), v36_(v36), v37_(v37), v38_(v38), v39_(v39),
      v40_(v40), v41_(v41), v42_(v42), v43_(v43), v44_(v44), v45_(v45),
      v46_(v46), v47_(v47), v48_(v48) {}

  template <typename T>
  operator ParamGenerator<T>() const {
    const T array[] = {static_cast<T>(v1_), static_cast<T>(v2_),
        static_cast<T>(v3_), static_cast<T>(v4_), static_cast<T>(v5_),
        static_cast<T>(v6_), static_cast<T>(v7_), static_cast<T>(v8_),
        static_cast<T>(v9_), static_cast<T>(v10_), static_cast<T>(v11_),
        static_cast<T>(v12_), static_cast<T>(v13_), static_cast<T>(v14_),
        static_cast<T>(v15_), static_cast<T>(v16_), static_cast<T>(v17_),
        static_cast<T>(v18_), static_cast<T>(v19_), static_cast<T>(v20_),
        static_cast<T>(v21_), static_cast<T>(v22_), static_cast<T>(v23_),
        static_cast<T>(v24_), static_cast<T>(v25_), static_cast<T>(v26_),
        static_cast<T>(v27_), static_cast<T>(v28_), static_cast<T>(v29_),
        static_cast<T>(v30_), static_cast<T>(v31_), static_cast<T>(v32_),
        static_cast<T>(v33_), static_cast<T>(v34_), static_cast<T>(v35_),
        static_cast<T>(v36_), static_cast<T>(v37_), static_cast<T>(v38_),
        static_cast<T>(v39_), static_cast<T>(v40_), static_cast<T>(v41_),
        static_cast<T>(v42_), static_cast<T>(v43_), static_cast<T>(v44_),
        static_cast<T>(v45_), static_cast<T>(v46_), static_cast<T>(v47_),
        static_cast<T>(v48_)};
    return ValuesIn(array);
  }

 private:
  // No implementation - assignment is unsupported.
  void operator=(const ValueArray48& other);

  const T1 v1_;
  const T2 v2_;
  const T3 v3_;
  const T4 v4_;
  const T5 v5_;
  const T6 v6_;
  const T7 v7_;
  const T8 v8_;
  const T9 v9_;
  const T10 v10_;
  const T11 v11_;
  const T12 v12_;
  const T13 v13_;
  const T14 v14_;
  const T15 v15_;
  const T16 v16_;
  const T17 v17_;
  const T18 v18_;
  const T19 v19_;
  const T20 v20_;
  const T21 v21_;
  const T22 v22_;
  const T23 v23_;
  const T24 v24_;
  const T25 v25_;
  const T26 v26_;
  const T27 v27_;
  const T28 v28_;
  const T29 v29_;
  const T30 v30_;
  const T31 v31_;
  const T32 v32_;
  const T33 v33_;
  const T34 v34_;
  const T35 v35_;
  const T36 v36_;
  const T37 v37_;
  const T38 v38_;
  const T39 v39_;
  const T40 v40_;
  const T41 v41_;
  const T42 v42_;
  const T43 v43_;
  const T44 v44_;
  const T45 v45_;
  const T46 v46_;
  const T47 v47_;
  const T48 v48_;
};

template <typename T1, typename T2, typename T3, typename T4, typename T5,
    typename T6, typename T7, typename T8, typename T9, typename T10,
    typename T11, typename T12, typename T13, typename T14, typename T15,
    typename T16, typename T17, typename T18, typename T19, typename T20,
    typename T21, typename T22, typename T23, typename T24, typename T25,
    typename T26, typename T27, typename T28, typename T29, typename T30,
    typename T31, typename T32, typename T33, typename T34, typename T35,
    typename T36, typename T37, typename T38, typename T39, typename T40,
    typename T41, typename T42, typename T43, typename T44, typename T45,
    typename T46, typename T47, typename T48, typename T49>
class ValueArray49 {
 public:
  ValueArray49(T1 v1, T2 v2, T3 v3, T4 v4, T5 v5, T6 v6, T7 v7, T8 v8, T9 v9,
      T10 v10, T11 v11, T12 v12, T13 v13, T14 v14, T15 v15, T16 v16, T17 v17,
      T18 v18, T19 v19, T20 v20, T21 v21, T22 v22, T23 v23, T24 v24, T25 v25,
      T26 v26, T27 v27, T28 v28, T29 v29, T30 v30, T31 v31, T32 v32, T33 v33,
      T34 v34, T35 v35, T36 v36, T37 v37, T38 v38, T39 v39, T40 v40, T41 v41,
      T42 v42, T43 v43, T44 v44, T45 v45, T46 v46, T47 v47, T48 v48,
      T49 v49) : v1_(v1), v2_(v2), v3_(v3), v4_(v4), v5_(v5), v6_(v6), v7_(v7),
      v8_(v8), v9_(v9), v10_(v10), v11_(v11), v12_(v12), v13_(v13), v14_(v14),
      v15_(v15), v16_(v16), v17_(v17), v18_(v18), v19_(v19), v20_(v20),
      v21_(v21), v22_(v22), v23_(v23), v24_(v24), v25_(v25), v26_(v26),
      v27_(v27), v28_(v28), v29_(v29), v30_(v30), v31_(v31), v32_(v32),
      v33_(v33), v34_(v34), v35_(v35), v36_(v36), v37_(v37), v38_(v38),
      v39_(v39), v40_(v40), v41_(v41), v42_(v42), v43_(v43), v44_(v44),
      v45_(v45), v46_(v46), v47_(v47), v48_(v48), v49_(v49) {}

  template <typename T>
  operator ParamGenerator<T>() const {
    const T array[] = {static_cast<T>(v1_), static_cast<T>(v2_),
        static_cast<T>(v3_), static_cast<T>(v4_), static_cast<T>(v5_),
        static_cast<T>(v6_), static_cast<T>(v7_), static_cast<T>(v8_),
        static_cast<T>(v9_), static_cast<T>(v10_), static_cast<T>(v11_),
        static_cast<T>(v12_), static_cast<T>(v13_), static_cast<T>(v14_),
        static_cast<T>(v15_), static_cast<T>(v16_), static_cast<T>(v17_),
        static_cast<T>(v18_), static_cast<T>(v19_), static_cast<T>(v20_),
        static_cast<T>(v21_), static_cast<T>(v22_), static_cast<T>(v23_),
        static_cast<T>(v24_), static_cast<T>(v25_), static_cast<T>(v26_),
        static_cast<T>(v27_), static_cast<T>(v28_), static_cast<T>(v29_),
        static_cast<T>(v30_), static_cast<T>(v31_), static_cast<T>(v32_),
        static_cast<T>(v33_), static_cast<T>(v34_), static_cast<T>(v35_),
        static_cast<T>(v36_), static_cast<T>(v37_), static_cast<T>(v38_),
        static_cast<T>(v39_), static_cast<T>(v40_), static_cast<T>(v41_),
        static_cast<T>(v42_), static_cast<T>(v43_), static_cast<T>(v44_),
        static_cast<T>(v45_), static_cast<T>(v46_), static_cast<T>(v47_),
        static_cast<T>(v48_), static_cast<T>(v49_)};
    return ValuesIn(array);
  }

 private:
  // No implementation - assignment is unsupported.
  void operator=(const ValueArray49& other);

  const T1 v1_;
  const T2 v2_;
  const T3 v3_;
  const T4 v4_;
  const T5 v5_;
  const T6 v6_;
  const T7 v7_;
  const T8 v8_;
  const T9 v9_;
  const T10 v10_;
  const T11 v11_;
  const T12 v12_;
  const T13 v13_;
  const T14 v14_;
  const T15 v15_;
  const T16 v16_;
  const T17 v17_;
  const T18 v18_;
  const T19 v19_;
  const T20 v20_;
  const T21 v21_;
  const T22 v22_;
  const T23 v23_;
  const T24 v24_;
  const T25 v25_;
  const T26 v26_;
  const T27 v27_;
  const T28 v28_;
  const T29 v29_;
  const T30 v30_;
  const T31 v31_;
  const T32 v32_;
  const T33 v33_;
  const T34 v34_;
  const T35 v35_;
  const T36 v36_;
  const T37 v37_;
  const T38 v38_;
  const T39 v39_;
  const T40 v40_;
  const T41 v41_;
  const T42 v42_;
  const T43 v43_;
  const T44 v44_;
  const T45 v45_;
  const T46 v46_;
  const T47 v47_;
  const T48 v48_;
  const T49 v49_;
};

template <typename T1, typename T2, typename T3, typename T4, typename T5,
    typename T6, typename T7, typename T8, typename T9, typename T10,
    typename T11, typename T12, typename T13, typename T14, typename T15,
    typename T16, typename T17, typename T18, typename T19, typename T20,
    typename T21, typename T22, typename T23, typename T24, typename T25,
    typename T26, typename T27, typename T28, typename T29, typename T30,
    typename T31, typename T32, typename T33, typename T34, typename T35,
    typename T36, typename T37, typename T38, typename T39, typename T40,
    typename T41, typename T42, typename T43, typename T44, typename T45,
    typename T46, typename T47, typename T48, typename T49, typename T50>
class ValueArray50 {
 public:
  ValueArray50(T1 v1, T2 v2, T3 v3, T4 v4, T5 v5, T6 v6, T7 v7, T8 v8, T9 v9,
      T10 v10, T11 v11, T12 v12, T13 v13, T14 v14, T15 v15, T16 v16, T17 v17,
      T18 v18, T19 v19, T20 v20, T21 v21, T22 v22, T23 v23, T24 v24, T25 v25,
      T26 v26, T27 v27, T28 v28, T29 v29, T30 v30, T31 v31, T32 v32, T33 v33,
      T34 v34, T35 v35, T36 v36, T37 v37, T38 v38, T39 v39, T40 v40, T41 v41,
      T42 v42, T43 v43, T44 v44, T45 v45, T46 v46, T47 v47, T48 v48, T49 v49,
      T50 v50) : v1_(v1), v2_(v2), v3_(v3), v4_(v4), v5_(v5), v6_(v6), v7_(v7),
      v8_(v8), v9_(v9), v10_(v10), v11_(v11), v12_(v12), v13_(v13), v14_(v14),
      v15_(v15), v16_(v16), v17_(v17), v18_(v18), v19_(v19), v20_(v20),
      v21_(v21), v22_(v22), v23_(v23), v24_(v24), v25_(v25), v26_(v26),
      v27_(v27), v28_(v28), v29_(v29), v30_(v30), v31_(v31), v32_(v32),
      v33_(v33), v34_(v34), v35_(v35), v36_(v36), v37_(v37), v38_(v38),
      v39_(v39), v40_(v40), v41_(v41), v42_(v42), v43_(v43), v44_(v44),
      v45_(v45), v46_(v46), v47_(v47), v48_(v48), v49_(v49), v50_(v50) {}

  template <typename T>
  operator ParamGenerator<T>() const {
    const T array[] = {static_cast<T>(v1_), static_cast<T>(v2_),
        static_cast<T>(v3_), static_cast<T>(v4_), static_cast<T>(v5_),
        static_cast<T>(v6_), static_cast<T>(v7_), static_cast<T>(v8_),
        static_cast<T>(v9_), static_cast<T>(v10_), static_cast<T>(v11_),
        static_cast<T>(v12_), static_cast<T>(v13_), static_cast<T>(v14_),
        static_cast<T>(v15_), static_cast<T>(v16_), static_cast<T>(v17_),
        static_cast<T>(v18_), static_cast<T>(v19_), static_cast<T>(v20_),
        static_cast<T>(v21_), static_cast<T>(v22_), static_cast<T>(v23_),
        static_cast<T>(v24_), static_cast<T>(v25_), static_cast<T>(v26_),
        static_cast<T>(v27_), static_cast<T>(v28_), static_cast<T>(v29_),
        static_cast<T>(v30_), static_cast<T>(v31_), static_cast<T>(v32_),
        static_cast<T>(v33_), static_cast<T>(v34_), static_cast<T>(v35_),
        static_cast<T>(v36_), static_cast<T>(v37_), static_cast<T>(v38_),
        static_cast<T>(v39_), static_cast<T>(v40_), static_cast<T>(v41_),
        static_cast<T>(v42_), static_cast<T>(v43_), static_cast<T>(v44_),
        static_cast<T>(v45_), static_cast<T>(v46_), static_cast<T>(v47_),
        static_cast<T>(v48_), static_cast<T>(v49_), static_cast<T>(v50_)};
    return ValuesIn(array);
  }

 private:
  // No implementation - assignment is unsupported.
  void operator=(const ValueArray50& other);

  const T1 v1_;
  const T2 v2_;
  const T3 v3_;
  const T4 v4_;
  const T5 v5_;
  const T6 v6_;
  const T7 v7_;
  const T8 v8_;
  const T9 v9_;
  const T10 v10_;
  const T11 v11_;
  const T12 v12_;
  const T13 v13_;
  const T14 v14_;
  const T15 v15_;
  const T16 v16_;
  const T17 v17_;
  const T18 v18_;
  const T19 v19_;
  const T20 v20_;
  const T21 v21_;
  const T22 v22_;
  const T23 v23_;
  const T24 v24_;
  const T25 v25_;
  const T26 v26_;
  const T27 v27_;
  const T28 v28_;
  const T29 v29_;
  const T30 v30_;
  const T31 v31_;
  const T32 v32_;
  const T33 v33_;
  const T34 v34_;
  const T35 v35_;
  const T36 v36_;
  const T37 v37_;
  const T38 v38_;
  const T39 v39_;
  const T40 v40_;
  const T41 v41_;
  const T42 v42_;
  const T43 v43_;
  const T44 v44_;
  const T45 v45_;
  const T46 v46_;
  const T47 v47_;
  const T48 v48_;
  const T49 v49_;
  const T50 v50_;
};

# if GTEST_HAS_COMBINE
// INTERNAL IMPLEMENTATION - DO NOT USE IN USER CODE.
//
// Generates values from the Cartesian product of values produced
// by the argument generators.
//
template <typename T1, typename T2>
class CartesianProductGenerator2
    : public ParamGeneratorInterface< ::testing::tuple<T1, T2> > {
 public:
  typedef ::testing::tuple<T1, T2> ParamType;

  CartesianProductGenerator2(const ParamGenerator<T1>& g1,
      const ParamGenerator<T2>& g2)
      : g1_(g1), g2_(g2) {}
  virtual ~CartesianProductGenerator2() {}

  virtual ParamIteratorInterface<ParamType>* Begin() const {
    return new Iterator(this, g1_, g1_.begin(), g2_, g2_.begin());
  }
  virtual ParamIteratorInterface<ParamType>* End() const {
    return new Iterator(this, g1_, g1_.end(), g2_, g2_.end());
  }

 private:
  class Iterator : public ParamIteratorInterface<ParamType> {
   public:
    Iterator(const ParamGeneratorInterface<ParamType>* base,
      const ParamGenerator<T1>& g1,
      const typename ParamGenerator<T1>::iterator& current1,
      const ParamGenerator<T2>& g2,
      const typename ParamGenerator<T2>::iterator& current2)
        : base_(base),
          begin1_(g1.begin()), end1_(g1.end()), current1_(current1),
          begin2_(g2.begin()), end2_(g2.end()), current2_(current2)    {
      ComputeCurrentValue();
    }
    virtual ~Iterator() {}

    virtual const ParamGeneratorInterface<ParamType>* BaseGenerator() const {
      return base_;
    }
    // Advance should not be called on beyond-of-range iterators
    // so no component iterators must be beyond end of range, either.
    virtual void Advance() {
      assert(!AtEnd());
      ++current2_;
      if (current2_ == end2_) {
        current2_ = begin2_;
        ++current1_;
      }
      ComputeCurrentValue();
    }
    virtual ParamIteratorInterface<ParamType>* Clone() const {
      return new Iterator(*this);
    }
    virtual const ParamType* Current() const { return &current_value_; }
    virtual bool Equals(const ParamIteratorInterface<ParamType>& other) const {
      // Having the same base generator guarantees that the other
      // iterator is of the same type and we can downcast.
      GTEST_CHECK_(BaseGenerator() == other.BaseGenerator())
          << "The program attempted to compare iterators "
          << "from different generators." << std::endl;
      const Iterator* typed_other =
          CheckedDowncastToActualType<const Iterator>(&other);
      // We must report iterators equal if they both point beyond their
      // respective ranges. That can happen in a variety of fashions,
      // so we have to consult AtEnd().
      return (AtEnd() && typed_other->AtEnd()) ||
         (
          current1_ == typed_other->current1_ &&
          current2_ == typed_other->current2_);
    }

   private:
    Iterator(const Iterator& other)
        : base_(other.base_),
        begin1_(other.begin1_),
        end1_(other.end1_),
        current1_(other.current1_),
        begin2_(other.begin2_),
        end2_(other.end2_),
        current2_(other.current2_) {
      ComputeCurrentValue();
    }

    void ComputeCurrentValue() {
      if (!AtEnd())
        current_value_ = ParamType(*current1_, *current2_);
    }
    bool AtEnd() const {
      // We must report iterator past the end of the range when either of the
      // component iterators has reached the end of its range.
      return
          current1_ == end1_ ||
          current2_ == end2_;
    }

    // No implementation - assignment is unsupported.
    void operator=(const Iterator& other);

    const ParamGeneratorInterface<ParamType>* const base_;
    // begin[i]_ and end[i]_ define the i-th range that Iterator traverses.
    // current[i]_ is the actual traversing iterator.
    const typename ParamGenerator<T1>::iterator begin1_;
    const typename ParamGenerator<T1>::iterator end1_;
    typename ParamGenerator<T1>::iterator current1_;
    const typename ParamGenerator<T2>::iterator begin2_;
    const typename ParamGenerator<T2>::iterator end2_;
    typename ParamGenerator<T2>::iterator current2_;
    ParamType current_value_;
  };  // class CartesianProductGenerator2::Iterator

  // No implementation - assignment is unsupported.
  void operator=(const CartesianProductGenerator2& other);

  const ParamGenerator<T1> g1_;
  const ParamGenerator<T2> g2_;
};  // class CartesianProductGenerator2


template <typename T1, typename T2, typename T3>
class CartesianProductGenerator3
    : public ParamGeneratorInterface< ::testing::tuple<T1, T2, T3> > {
 public:
  typedef ::testing::tuple<T1, T2, T3> ParamType;

  CartesianProductGenerator3(const ParamGenerator<T1>& g1,
      const ParamGenerator<T2>& g2, const ParamGenerator<T3>& g3)
      : g1_(g1), g2_(g2), g3_(g3) {}
  virtual ~CartesianProductGenerator3() {}

  virtual ParamIteratorInterface<ParamType>* Begin() const {
    return new Iterator(this, g1_, g1_.begin(), g2_, g2_.begin(), g3_,
        g3_.begin());
  }
  virtual ParamIteratorInterface<ParamType>* End() const {
    return new Iterator(this, g1_, g1_.end(), g2_, g2_.end(), g3_, g3_.end());
  }

 private:
  class Iterator : public ParamIteratorInterface<ParamType> {
   public:
    Iterator(const ParamGeneratorInterface<ParamType>* base,
      const ParamGenerator<T1>& g1,
      const typename ParamGenerator<T1>::iterator& current1,
      const ParamGenerator<T2>& g2,
      const typename ParamGenerator<T2>::iterator& current2,
      const ParamGenerator<T3>& g3,
      const typename ParamGenerator<T3>::iterator& current3)
        : base_(base),
          begin1_(g1.begin()), end1_(g1.end()), current1_(current1),
          begin2_(g2.begin()), end2_(g2.end()), current2_(current2),
          begin3_(g3.begin()), end3_(g3.end()), current3_(current3)    {
      ComputeCurrentValue();
    }
    virtual ~Iterator() {}

    virtual const ParamGeneratorInterface<ParamType>* BaseGenerator() const {
      return base_;
    }
    // Advance should not be called on beyond-of-range iterators
    // so no component iterators must be beyond end of range, either.
    virtual void Advance() {
      assert(!AtEnd());
      ++current3_;
      if (current3_ == end3_) {
        current3_ = begin3_;
        ++current2_;
      }
      if (current2_ == end2_) {
        current2_ = begin2_;
        ++current1_;
      }
      ComputeCurrentValue();
    }
    virtual ParamIteratorInterface<ParamType>* Clone() const {
      return new Iterator(*this);
    }
    virtual const ParamType* Current() const { return &current_value_; }
    virtual bool Equals(const ParamIteratorInterface<ParamType>& other) const {
      // Having the same base generator guarantees that the other
      // iterator is of the same type and we can downcast.
      GTEST_CHECK_(BaseGenerator() == other.BaseGenerator())
          << "The program attempted to compare iterators "
          << "from different generators." << std::endl;
      const Iterator* typed_other =
          CheckedDowncastToActualType<const Iterator>(&other);
      // We must report iterators equal if they both point beyond their
      // respective ranges. That can happen in a variety of fashions,
      // so we have to consult AtEnd().
      return (AtEnd() && typed_other->AtEnd()) ||
         (
          current1_ == typed_other->current1_ &&
          current2_ == typed_other->current2_ &&
          current3_ == typed_other->current3_);
    }

   private:
    Iterator(const Iterator& other)
        : base_(other.base_),
        begin1_(other.begin1_),
        end1_(other.end1_),
        current1_(other.current1_),
        begin2_(other.begin2_),
        end2_(other.end2_),
        current2_(other.current2_),
        begin3_(other.begin3_),
        end3_(other.end3_),
        current3_(other.current3_) {
      ComputeCurrentValue();
    }

    void ComputeCurrentValue() {
      if (!AtEnd())
        current_value_ = ParamType(*current1_, *current2_, *current3_);
    }
    bool AtEnd() const {
      // We must report iterator past the end of the range when either of the
      // component iterators has reached the end of its range.
      return
          current1_ == end1_ ||
          current2_ == end2_ ||
          current3_ == end3_;
    }

    // No implementation - assignment is unsupported.
    void operator=(const Iterator& other);

    const ParamGeneratorInterface<ParamType>* const base_;
    // begin[i]_ and end[i]_ define the i-th range that Iterator traverses.
    // current[i]_ is the actual traversing iterator.
    const typename ParamGenerator<T1>::iterator begin1_;
    const typename ParamGenerator<T1>::iterator end1_;
    typename ParamGenerator<T1>::iterator current1_;
    const typename ParamGenerator<T2>::iterator begin2_;
    const typename ParamGenerator<T2>::iterator end2_;
    typename ParamGenerator<T2>::iterator current2_;
    const typename ParamGenerator<T3>::iterator begin3_;
    const typename ParamGenerator<T3>::iterator end3_;
    typename ParamGenerator<T3>::iterator current3_;
    ParamType current_value_;
  };  // class CartesianProductGenerator3::Iterator

  // No implementation - assignment is unsupported.
  void operator=(const CartesianProductGenerator3& other);

  const ParamGenerator<T1> g1_;
  const ParamGenerator<T2> g2_;
  const ParamGenerator<T3> g3_;
};  // class CartesianProductGenerator3


template <typename T1, typename T2, typename T3, typename T4>
class CartesianProductGenerator4
    : public ParamGeneratorInterface< ::testing::tuple<T1, T2, T3, T4> > {
 public:
  typedef ::testing::tuple<T1, T2, T3, T4> ParamType;

  CartesianProductGenerator4(const ParamGenerator<T1>& g1,
      const ParamGenerator<T2>& g2, const ParamGenerator<T3>& g3,
      const ParamGenerator<T4>& g4)
      : g1_(g1), g2_(g2), g3_(g3), g4_(g4) {}
  virtual ~CartesianProductGenerator4() {}

  virtual ParamIteratorInterface<ParamType>* Begin() const {
    return new Iterator(this, g1_, g1_.begin(), g2_, g2_.begin(), g3_,
        g3_.begin(), g4_, g4_.begin());
  }
  virtual ParamIteratorInterface<ParamType>* End() const {
    return new Iterator(this, g1_, g1_.end(), g2_, g2_.end(), g3_, g3_.end(),
        g4_, g4_.end());
  }

 private:
  class Iterator : public ParamIteratorInterface<ParamType> {
   public:
    Iterator(const ParamGeneratorInterface<ParamType>* base,
      const ParamGenerator<T1>& g1,
      const typename ParamGenerator<T1>::iterator& current1,
      const ParamGenerator<T2>& g2,
      const typename ParamGenerator<T2>::iterator& current2,
      const ParamGenerator<T3>& g3,
      const typename ParamGenerator<T3>::iterator& current3,
      const ParamGenerator<T4>& g4,
      const typename ParamGenerator<T4>::iterator& current4)
        : base_(base),
          begin1_(g1.begin()), end1_(g1.end()), current1_(current1),
          begin2_(g2.begin()), end2_(g2.end()), current2_(current2),
          begin3_(g3.begin()), end3_(g3.end()), current3_(current3),
          begin4_(g4.begin()), end4_(g4.end()), current4_(current4)    {
      ComputeCurrentValue();
    }
    virtual ~Iterator() {}

    virtual const ParamGeneratorInterface<ParamType>* BaseGenerator() const {
      return base_;
    }
    // Advance should not be called on beyond-of-range iterators
    // so no component iterators must be beyond end of range, either.
    virtual void Advance() {
      assert(!AtEnd());
      ++current4_;
      if (current4_ == end4_) {
        current4_ = begin4_;
        ++current3_;
      }
      if (current3_ == end3_) {
        current3_ = begin3_;
        ++current2_;
      }
      if (current2_ == end2_) {
        current2_ = begin2_;
        ++current1_;
      }
      ComputeCurrentValue();
    }
    virtual ParamIteratorInterface<ParamType>* Clone() const {
      return new Iterator(*this);
    }
    virtual const ParamType* Current() const { return &current_value_; }
    virtual bool Equals(const ParamIteratorInterface<ParamType>& other) const {
      // Having the same base generator guarantees that the other
      // iterator is of the same type and we can downcast.
      GTEST_CHECK_(BaseGenerator() == other.BaseGenerator())
          << "The program attempted to compare iterators "
          << "from different generators." << std::endl;
      const Iterator* typed_other =
          CheckedDowncastToActualType<const Iterator>(&other);
      // We must report iterators equal if they both point beyond their
      // respective ranges. That can happen in a variety of fashions,
      // so we have to consult AtEnd().
      return (AtEnd() && typed_other->AtEnd()) ||
         (
          current1_ == typed_other->current1_ &&
          current2_ == typed_other->current2_ &&
          current3_ == typed_other->current3_ &&
          current4_ == typed_other->current4_);
    }

   private:
    Iterator(const Iterator& other)
        : base_(other.base_),
        begin1_(other.begin1_),
        end1_(other.end1_),
        current1_(other.current1_),
        begin2_(other.begin2_),
        end2_(other.end2_),
        current2_(other.current2_),
        begin3_(other.begin3_),
        end3_(other.end3_),
        current3_(other.current3_),
        begin4_(other.begin4_),
        end4_(other.end4_),
        current4_(other.current4_) {
      ComputeCurrentValue();
    }

    void ComputeCurrentValue() {
      if (!AtEnd())
        current_value_ = ParamType(*current1_, *current2_, *current3_,
            *current4_);
    }
    bool AtEnd() const {
      // We must report iterator past the end of the range when either of the
      // component iterators has reached the end of its range.
      return
          current1_ == end1_ ||
          current2_ == end2_ ||
          current3_ == end3_ ||
          current4_ == end4_;
    }

    // No implementation - assignment is unsupported.
    void operator=(const Iterator& other);

    const ParamGeneratorInterface<ParamType>* const base_;
    // begin[i]_ and end[i]_ define the i-th range that Iterator traverses.
    // current[i]_ is the actual traversing iterator.
    const typename ParamGenerator<T1>::iterator begin1_;
    const typename ParamGenerator<T1>::iterator end1_;
    typename ParamGenerator<T1>::iterator current1_;
    const typename ParamGenerator<T2>::iterator begin2_;
    const typename ParamGenerator<T2>::iterator end2_;
    typename ParamGenerator<T2>::iterator current2_;
    const typename ParamGenerator<T3>::iterator begin3_;
    const typename ParamGenerator<T3>::iterator end3_;
    typename ParamGenerator<T3>::iterator current3_;
    const typename ParamGenerator<T4>::iterator begin4_;
    const typename ParamGenerator<T4>::iterator end4_;
    typename ParamGenerator<T4>::iterator current4_;
    ParamType current_value_;
  };  // class CartesianProductGenerator4::Iterator

  // No implementation - assignment is unsupported.
  void operator=(const CartesianProductGenerator4& other);

  const ParamGenerator<T1> g1_;
  const ParamGenerator<T2> g2_;
  const ParamGenerator<T3> g3_;
  const ParamGenerator<T4> g4_;
};  // class CartesianProductGenerator4


template <typename T1, typename T2, typename T3, typename T4, typename T5>
class CartesianProductGenerator5
    : public ParamGeneratorInterface< ::testing::tuple<T1, T2, T3, T4, T5> > {
 public:
  typedef ::testing::tuple<T1, T2, T3, T4, T5> ParamType;

  CartesianProductGenerator5(const ParamGenerator<T1>& g1,
      const ParamGenerator<T2>& g2, const ParamGenerator<T3>& g3,
      const ParamGenerator<T4>& g4, const ParamGenerator<T5>& g5)
      : g1_(g1), g2_(g2), g3_(g3), g4_(g4), g5_(g5) {}
  virtual ~CartesianProductGenerator5() {}

  virtual ParamIteratorInterface<ParamType>* Begin() const {
    return new Iterator(this, g1_, g1_.begin(), g2_, g2_.begin(), g3_,
        g3_.begin(), g4_, g4_.begin(), g5_, g5_.begin());
  }
  virtual ParamIteratorInterface<ParamType>* End() const {
    return new Iterator(this, g1_, g1_.end(), g2_, g2_.end(), g3_, g3_.end(),
        g4_, g4_.end(), g5_, g5_.end());
  }

 private:
  class Iterator : public ParamIteratorInterface<ParamType> {
   public:
    Iterator(const ParamGeneratorInterface<ParamType>* base,
      const ParamGenerator<T1>& g1,
      const typename ParamGenerator<T1>::iterator& current1,
      const ParamGenerator<T2>& g2,
      const typename ParamGenerator<T2>::iterator& current2,
      const ParamGenerator<T3>& g3,
      const typename ParamGenerator<T3>::iterator& current3,
      const ParamGenerator<T4>& g4,
      const typename ParamGenerator<T4>::iterator& current4,
      const ParamGenerator<T5>& g5,
      const typename ParamGenerator<T5>::iterator& current5)
        : base_(base),
          begin1_(g1.begin()), end1_(g1.end()), current1_(current1),
          begin2_(g2.begin()), end2_(g2.end()), current2_(current2),
          begin3_(g3.begin()), end3_(g3.end()), current3_(current3),
          begin4_(g4.begin()), end4_(g4.end()), current4_(current4),
          begin5_(g5.begin()), end5_(g5.end()), current5_(current5)    {
      ComputeCurrentValue();
    }
    virtual ~Iterator() {}

    virtual const ParamGeneratorInterface<ParamType>* BaseGenerator() const {
      return base_;
    }
    // Advance should not be called on beyond-of-range iterators
    // so no component iterators must be beyond end of range, either.
    virtual void Advance() {
      assert(!AtEnd());
      ++current5_;
      if (current5_ == end5_) {
        current5_ = begin5_;
        ++current4_;
      }
      if (current4_ == end4_) {
        current4_ = begin4_;
        ++current3_;
      }
      if (current3_ == end3_) {
        current3_ = begin3_;
        ++current2_;
      }
      if (current2_ == end2_) {
        current2_ = begin2_;
        ++current1_;
      }
      ComputeCurrentValue();
    }
    virtual ParamIteratorInterface<ParamType>* Clone() const {
      return new Iterator(*this);
    }
    virtual const ParamType* Current() const { return &current_value_; }
    virtual bool Equals(const ParamIteratorInterface<ParamType>& other) const {
      // Having the same base generator guarantees that the other
      // iterator is of the same type and we can downcast.
      GTEST_CHECK_(BaseGenerator() == other.BaseGenerator())
          << "The program attempted to compare iterators "
          << "from different generators." << std::endl;
      const Iterator* typed_other =
          CheckedDowncastToActualType<const Iterator>(&other);
      // We must report iterators equal if they both point beyond their
      // respective ranges. That can happen in a variety of fashions,
      // so we have to consult AtEnd().
      return (AtEnd() && typed_other->AtEnd()) ||
         (
          current1_ == typed_other->current1_ &&
          current2_ == typed_other->current2_ &&
          current3_ == typed_other->current3_ &&
          current4_ == typed_other->current4_ &&
          current5_ == typed_other->current5_);
    }

   private:
    Iterator(const Iterator& other)
        : base_(other.base_),
        begin1_(other.begin1_),
        end1_(other.end1_),
        current1_(other.current1_),
        begin2_(other.begin2_),
        end2_(other.end2_),
        current2_(other.current2_),
        begin3_(other.begin3_),
        end3_(other.end3_),
        current3_(other.current3_),
        begin4_(other.begin4_),
        end4_(other.end4_),
        current4_(other.current4_),
        begin5_(other.begin5_),
        end5_(other.end5_),
        current5_(other.current5_) {
      ComputeCurrentValue();
    }

    void ComputeCurrentValue() {
      if (!AtEnd())
        current_value_ = ParamType(*current1_, *current2_, *current3_,
            *current4_, *current5_);
    }
    bool AtEnd() const {
      // We must report iterator past the end of the range when either of the
      // component iterators has reached the end of its range.
      return
          current1_ == end1_ ||
          current2_ == end2_ ||
          current3_ == end3_ ||
          current4_ == end4_ ||
          current5_ == end5_;
    }

    // No implementation - assignment is unsupported.
    void operator=(const Iterator& other);

    const ParamGeneratorInterface<ParamType>* const base_;
    // begin[i]_ and end[i]_ define the i-th range that Iterator traverses.
    // current[i]_ is the actual traversing iterator.
    const typename ParamGenerator<T1>::iterator begin1_;
    const typename ParamGenerator<T1>::iterator end1_;
    typename ParamGenerator<T1>::iterator current1_;
    const typename ParamGenerator<T2>::iterator begin2_;
    const typename ParamGenerator<T2>::iterator end2_;
    typename ParamGenerator<T2>::iterator current2_;
    const typename ParamGenerator<T3>::iterator begin3_;
    const typename ParamGenerator<T3>::iterator end3_;
    typename ParamGenerator<T3>::iterator current3_;
    const typename ParamGenerator<T4>::iterator begin4_;
    const typename ParamGenerator<T4>::iterator end4_;
    typename ParamGenerator<T4>::iterator current4_;
    const typename ParamGenerator<T5>::iterator begin5_;
    const typename ParamGenerator<T5>::iterator end5_;
    typename ParamGenerator<T5>::iterator current5_;
    ParamType current_value_;
  };  // class CartesianProductGenerator5::Iterator

  // No implementation - assignment is unsupported.
  void operator=(const CartesianProductGenerator5& other);

  const ParamGenerator<T1> g1_;
  const ParamGenerator<T2> g2_;
  const ParamGenerator<T3> g3_;
  const ParamGenerator<T4> g4_;
  const ParamGenerator<T5> g5_;
};  // class CartesianProductGenerator5


template <typename T1, typename T2, typename T3, typename T4, typename T5,
    typename T6>
class CartesianProductGenerator6
    : public ParamGeneratorInterface< ::testing::tuple<T1, T2, T3, T4, T5,
        T6> > {
 public:
  typedef ::testing::tuple<T1, T2, T3, T4, T5, T6> ParamType;

  CartesianProductGenerator6(const ParamGenerator<T1>& g1,
      const ParamGenerator<T2>& g2, const ParamGenerator<T3>& g3,
      const ParamGenerator<T4>& g4, const ParamGenerator<T5>& g5,
      const ParamGenerator<T6>& g6)
      : g1_(g1), g2_(g2), g3_(g3), g4_(g4), g5_(g5), g6_(g6) {}
  virtual ~CartesianProductGenerator6() {}

  virtual ParamIteratorInterface<ParamType>* Begin() const {
    return new Iterator(this, g1_, g1_.begin(), g2_, g2_.begin(), g3_,
        g3_.begin(), g4_, g4_.begin(), g5_, g5_.begin(), g6_, g6_.begin());
  }
  virtual ParamIteratorInterface<ParamType>* End() const {
    return new Iterator(this, g1_, g1_.end(), g2_, g2_.end(), g3_, g3_.end(),
        g4_, g4_.end(), g5_, g5_.end(), g6_, g6_.end());
  }

 private:
  class Iterator : public ParamIteratorInterface<ParamType> {
   public:
    Iterator(const ParamGeneratorInterface<ParamType>* base,
      const ParamGenerator<T1>& g1,
      const typename ParamGenerator<T1>::iterator& current1,
      const ParamGenerator<T2>& g2,
      const typename ParamGenerator<T2>::iterator& current2,
      const ParamGenerator<T3>& g3,
      const typename ParamGenerator<T3>::iterator& current3,
      const ParamGenerator<T4>& g4,
      const typename ParamGenerator<T4>::iterator& current4,
      const ParamGenerator<T5>& g5,
      const typename ParamGenerator<T5>::iterator& current5,
      const ParamGenerator<T6>& g6,
      const typename ParamGenerator<T6>::iterator& current6)
        : base_(base),
          begin1_(g1.begin()), end1_(g1.end()), current1_(current1),
          begin2_(g2.begin()), end2_(g2.end()), current2_(current2),
          begin3_(g3.begin()), end3_(g3.end()), current3_(current3),
          begin4_(g4.begin()), end4_(g4.end()), current4_(current4),
          begin5_(g5.begin()), end5_(g5.end()), current5_(current5),
          begin6_(g6.begin()), end6_(g6.end()), current6_(current6)    {
      ComputeCurrentValue();
    }
    virtual ~Iterator() {}

    virtual const ParamGeneratorInterface<ParamType>* BaseGenerator() const {
      return base_;
    }
    // Advance should not be called on beyond-of-range iterators
    // so no component iterators must be beyond end of range, either.
    virtual void Advance() {
      assert(!AtEnd());
      ++current6_;
      if (current6_ == end6_) {
        current6_ = begin6_;
        ++current5_;
      }
      if (current5_ == end5_) {
        current5_ = begin5_;
        ++current4_;
      }
      if (current4_ == end4_) {
        current4_ = begin4_;
        ++current3_;
      }
      if (current3_ == end3_) {
        current3_ = begin3_;
        ++current2_;
      }
      if (current2_ == end2_) {
        current2_ = begin2_;
        ++current1_;
      }
      ComputeCurrentValue();
    }
    virtual ParamIteratorInterface<ParamType>* Clone() const {
      return new Iterator(*this);
    }
    virtual const ParamType* Current() const { return &current_value_; }
    virtual bool Equals(const ParamIteratorInterface<ParamType>& other) const {
      // Having the same base generator guarantees that the other
      // iterator is of the same type and we can downcast.
      GTEST_CHECK_(BaseGenerator() == other.BaseGenerator())
          << "The program attempted to compare iterators "
          << "from different generators." << std::endl;
      const Iterator* typed_other =
          CheckedDowncastToActualType<const Iterator>(&other);
      // We must report iterators equal if they both point beyond their
      // respective ranges. That can happen in a variety of fashions,
      // so we have to consult AtEnd().
      return (AtEnd() && typed_other->AtEnd()) ||
         (
          current1_ == typed_other->current1_ &&
          current2_ == typed_other->current2_ &&
          current3_ == typed_other->current3_ &&
          current4_ == typed_other->current4_ &&
          current5_ == typed_other->current5_ &&
          current6_ == typed_other->current6_);
    }

   private:
    Iterator(const Iterator& other)
        : base_(other.base_),
        begin1_(other.begin1_),
        end1_(other.end1_),
        current1_(other.current1_),
        begin2_(other.begin2_),
        end2_(other.end2_),
        current2_(other.current2_),
        begin3_(other.begin3_),
        end3_(other.end3_),
        current3_(other.current3_),
        begin4_(other.begin4_),
        end4_(other.end4_),
        current4_(other.current4_),
        begin5_(other.begin5_),
        end5_(other.end5_),
        current5_(other.current5_),
        begin6_(other.begin6_),
        end6_(other.end6_),
        current6_(other.current6_) {
      ComputeCurrentValue();
    }

    void ComputeCurrentValue() {
      if (!AtEnd())
        current_value_ = ParamType(*current1_, *current2_, *current3_,
            *current4_, *current5_, *current6_);
    }
    bool AtEnd() const {
      // We must report iterator past the end of the range when either of the
      // component iterators has reached the end of its range.
      return
          current1_ == end1_ ||
          current2_ == end2_ ||
          current3_ == end3_ ||
          current4_ == end4_ ||
          current5_ == end5_ ||
          current6_ == end6_;
    }

    // No implementation - assignment is unsupported.
    void operator=(const Iterator& other);

    const ParamGeneratorInterface<ParamType>* const base_;
    // begin[i]_ and end[i]_ define the i-th range that Iterator traverses.
    // current[i]_ is the actual traversing iterator.
    const typename ParamGenerator<T1>::iterator begin1_;
    const typename ParamGenerator<T1>::iterator end1_;
    typename ParamGenerator<T1>::iterator current1_;
    const typename ParamGenerator<T2>::iterator begin2_;
    const typename ParamGenerator<T2>::iterator end2_;
    typename ParamGenerator<T2>::iterator current2_;
    const typename ParamGenerator<T3>::iterator begin3_;
    const typename ParamGenerator<T3>::iterator end3_;
    typename ParamGenerator<T3>::iterator current3_;
    const typename ParamGenerator<T4>::iterator begin4_;
    const typename ParamGenerator<T4>::iterator end4_;
    typename ParamGenerator<T4>::iterator current4_;
    const typename ParamGenerator<T5>::iterator begin5_;
    const typename ParamGenerator<T5>::iterator end5_;
    typename ParamGenerator<T5>::iterator current5_;
    const typename ParamGenerator<T6>::iterator begin6_;
    const typename ParamGenerator<T6>::iterator end6_;
    typename ParamGenerator<T6>::iterator current6_;
    ParamType current_value_;
  };  // class CartesianProductGenerator6::Iterator

  // No implementation - assignment is unsupported.
  void operator=(const CartesianProductGenerator6& other);

  const ParamGenerator<T1> g1_;
  const ParamGenerator<T2> g2_;
  const ParamGenerator<T3> g3_;
  const ParamGenerator<T4> g4_;
  const ParamGenerator<T5> g5_;
  const ParamGenerator<T6> g6_;
};  // class CartesianProductGenerator6


template <typename T1, typename T2, typename T3, typename T4, typename T5,
    typename T6, typename T7>
class CartesianProductGenerator7
    : public ParamGeneratorInterface< ::testing::tuple<T1, T2, T3, T4, T5, T6,
        T7> > {
 public:
  typedef ::testing::tuple<T1, T2, T3, T4, T5, T6, T7> ParamType;

  CartesianProductGenerator7(const ParamGenerator<T1>& g1,
      const ParamGenerator<T2>& g2, const ParamGenerator<T3>& g3,
      const ParamGenerator<T4>& g4, const ParamGenerator<T5>& g5,
      const ParamGenerator<T6>& g6, const ParamGenerator<T7>& g7)
      : g1_(g1), g2_(g2), g3_(g3), g4_(g4), g5_(g5), g6_(g6), g7_(g7) {}
  virtual ~CartesianProductGenerator7() {}

  virtual ParamIteratorInterface<ParamType>* Begin() const {
    return new Iterator(this, g1_, g1_.begin(), g2_, g2_.begin(), g3_,
        g3_.begin(), g4_, g4_.begin(), g5_, g5_.begin(), g6_, g6_.begin(), g7_,
        g7_.begin());
  }
  virtual ParamIteratorInterface<ParamType>* End() const {
    return new Iterator(this, g1_, g1_.end(), g2_, g2_.end(), g3_, g3_.end(),
        g4_, g4_.end(), g5_, g5_.end(), g6_, g6_.end(), g7_, g7_.end());
  }

 private:
  class Iterator : public ParamIteratorInterface<ParamType> {
   public:
    Iterator(const ParamGeneratorInterface<ParamType>* base,
      const ParamGenerator<T1>& g1,
      const typename ParamGenerator<T1>::iterator& current1,
      const ParamGenerator<T2>& g2,
      const typename ParamGenerator<T2>::iterator& current2,
      const ParamGenerator<T3>& g3,
      const typename ParamGenerator<T3>::iterator& current3,
      const ParamGenerator<T4>& g4,
      const typename ParamGenerator<T4>::iterator& current4,
      const ParamGenerator<T5>& g5,
      const typename ParamGenerator<T5>::iterator& current5,
      const ParamGenerator<T6>& g6,
      const typename ParamGenerator<T6>::iterator& current6,
      const ParamGenerator<T7>& g7,
      const typename ParamGenerator<T7>::iterator& current7)
        : base_(base),
          begin1_(g1.begin()), end1_(g1.end()), current1_(current1),
          begin2_(g2.begin()), end2_(g2.end()), current2_(current2),
          begin3_(g3.begin()), end3_(g3.end()), current3_(current3),
          begin4_(g4.begin()), end4_(g4.end()), current4_(current4),
          begin5_(g5.begin()), end5_(g5.end()), current5_(current5),
          begin6_(g6.begin()), end6_(g6.end()), current6_(current6),
          begin7_(g7.begin()), end7_(g7.end()), current7_(current7)    {
      ComputeCurrentValue();
    }
    virtual ~Iterator() {}

    virtual const ParamGeneratorInterface<ParamType>* BaseGenerator() const {
      return base_;
    }
    // Advance should not be called on beyond-of-range iterators
    // so no component iterators must be beyond end of range, either.
    virtual void Advance() {
      assert(!AtEnd());
      ++current7_;
      if (current7_ == end7_) {
        current7_ = begin7_;
        ++current6_;
      }
      if (current6_ == end6_) {
        current6_ = begin6_;
        ++current5_;
      }
      if (current5_ == end5_) {
        current5_ = begin5_;
        ++current4_;
      }
      if (current4_ == end4_) {
        current4_ = begin4_;
        ++current3_;
      }
      if (current3_ == end3_) {
        current3_ = begin3_;
        ++current2_;
      }
      if (current2_ == end2_) {
        current2_ = begin2_;
        ++current1_;
      }
      ComputeCurrentValue();
    }
    virtual ParamIteratorInterface<ParamType>* Clone() const {
      return new Iterator(*this);
    }
    virtual const ParamType* Current() const { return &current_value_; }
    virtual bool Equals(const ParamIteratorInterface<ParamType>& other) const {
      // Having the same base generator guarantees that the other
      // iterator is of the same type and we can downcast.
      GTEST_CHECK_(BaseGenerator() == other.BaseGenerator())
          << "The program attempted to compare iterators "
          << "from different generators." << std::endl;
      const Iterator* typed_other =
          CheckedDowncastToActualType<const Iterator>(&other);
      // We must report iterators equal if they both point beyond their
      // respective ranges. That can happen in a variety of fashions,
      // so we have to consult AtEnd().
      return (AtEnd() && typed_other->AtEnd()) ||
         (
          current1_ == typed_other->current1_ &&
          current2_ == typed_other->current2_ &&
          current3_ == typed_other->current3_ &&
          current4_ == typed_other->current4_ &&
          current5_ == typed_other->current5_ &&
          current6_ == typed_other->current6_ &&
          current7_ == typed_other->current7_);
    }

   private:
    Iterator(const Iterator& other)
        : base_(other.base_),
        begin1_(other.begin1_),
        end1_(other.end1_),
        current1_(other.current1_),
        begin2_(other.begin2_),
        end2_(other.end2_),
        current2_(other.current2_),
        begin3_(other.begin3_),
        end3_(other.end3_),
        current3_(other.current3_),
        begin4_(other.begin4_),
        end4_(other.end4_),
        current4_(other.current4_),
        begin5_(other.begin5_),
        end5_(other.end5_),
        current5_(other.current5_),
        begin6_(other.begin6_),
        end6_(other.end6_),
        current6_(other.current6_),
        begin7_(other.begin7_),
        end7_(other.end7_),
        current7_(other.current7_) {
      ComputeCurrentValue();
    }

    void ComputeCurrentValue() {
      if (!AtEnd())
        current_value_ = ParamType(*current1_, *current2_, *current3_,
            *current4_, *current5_, *current6_, *current7_);
    }
    bool AtEnd() const {
      // We must report iterator past the end of the range when either of the
      // component iterators has reached the end of its range.
      return
          current1_ == end1_ ||
          current2_ == end2_ ||
          current3_ == end3_ ||
          current4_ == end4_ ||
          current5_ == end5_ ||
          current6_ == end6_ ||
          current7_ == end7_;
    }

    // No implementation - assignment is unsupported.
    void operator=(const Iterator& other);

    const ParamGeneratorInterface<ParamType>* const base_;
    // begin[i]_ and end[i]_ define the i-th range that Iterator traverses.
    // current[i]_ is the actual traversing iterator.
    const typename ParamGenerator<T1>::iterator begin1_;
    const typename ParamGenerator<T1>::iterator end1_;
    typename ParamGenerator<T1>::iterator current1_;
    const typename ParamGenerator<T2>::iterator begin2_;
    const typename ParamGenerator<T2>::iterator end2_;
    typename ParamGenerator<T2>::iterator current2_;
    const typename ParamGenerator<T3>::iterator begin3_;
    const typename ParamGenerator<T3>::iterator end3_;
    typename ParamGenerator<T3>::iterator current3_;
    const typename ParamGenerator<T4>::iterator begin4_;
    const typename ParamGenerator<T4>::iterator end4_;
    typename ParamGenerator<T4>::iterator current4_;
    const typename ParamGenerator<T5>::iterator begin5_;
    const typename ParamGenerator<T5>::iterator end5_;
    typename ParamGenerator<T5>::iterator current5_;
    const typename ParamGenerator<T6>::iterator begin6_;
    const typename ParamGenerator<T6>::iterator end6_;
    typename ParamGenerator<T6>::iterator current6_;
    const typename ParamGenerator<T7>::iterator begin7_;
    const typename ParamGenerator<T7>::iterator end7_;
    typename ParamGenerator<T7>::iterator current7_;
    ParamType current_value_;
  };  // class CartesianProductGenerator7::Iterator

  // No implementation - assignment is unsupported.
  void operator=(const CartesianProductGenerator7& other);

  const ParamGenerator<T1> g1_;
  const ParamGenerator<T2> g2_;
  const ParamGenerator<T3> g3_;
  const ParamGenerator<T4> g4_;
  const ParamGenerator<T5> g5_;
  const ParamGenerator<T6> g6_;
  const ParamGenerator<T7> g7_;
};  // class CartesianProductGenerator7


template <typename T1, typename T2, typename T3, typename T4, typename T5,
    typename T6, typename T7, typename T8>
class CartesianProductGenerator8
    : public ParamGeneratorInterface< ::testing::tuple<T1, T2, T3, T4, T5, T6,
        T7, T8> > {
 public:
  typedef ::testing::tuple<T1, T2, T3, T4, T5, T6, T7, T8> ParamType;

  CartesianProductGenerator8(const ParamGenerator<T1>& g1,
      const ParamGenerator<T2>& g2, const ParamGenerator<T3>& g3,
      const ParamGenerator<T4>& g4, const ParamGenerator<T5>& g5,
      const ParamGenerator<T6>& g6, const ParamGenerator<T7>& g7,
      const ParamGenerator<T8>& g8)
      : g1_(g1), g2_(g2), g3_(g3), g4_(g4), g5_(g5), g6_(g6), g7_(g7),
          g8_(g8) {}
  virtual ~CartesianProductGenerator8() {}

  virtual ParamIteratorInterface<ParamType>* Begin() const {
    return new Iterator(this, g1_, g1_.begin(), g2_, g2_.begin(), g3_,
        g3_.begin(), g4_, g4_.begin(), g5_, g5_.begin(), g6_, g6_.begin(), g7_,
        g7_.begin(), g8_, g8_.begin());
  }
  virtual ParamIteratorInterface<ParamType>* End() const {
    return new Iterator(this, g1_, g1_.end(), g2_, g2_.end(), g3_, g3_.end(),
        g4_, g4_.end(), g5_, g5_.end(), g6_, g6_.end(), g7_, g7_.end(), g8_,
        g8_.end());
  }

 private:
  class Iterator : public ParamIteratorInterface<ParamType> {
   public:
    Iterator(const ParamGeneratorInterface<ParamType>* base,
      const ParamGenerator<T1>& g1,
      const typename ParamGenerator<T1>::iterator& current1,
      const ParamGenerator<T2>& g2,
      const typename ParamGenerator<T2>::iterator& current2,
      const ParamGenerator<T3>& g3,
      const typename ParamGenerator<T3>::iterator& current3,
      const ParamGenerator<T4>& g4,
      const typename ParamGenerator<T4>::iterator& current4,
      const ParamGenerator<T5>& g5,
      const typename ParamGenerator<T5>::iterator& current5,
      const ParamGenerator<T6>& g6,
      const typename ParamGenerator<T6>::iterator& current6,
      const ParamGenerator<T7>& g7,
      const typename ParamGenerator<T7>::iterator& current7,
      const ParamGenerator<T8>& g8,
      const typename ParamGenerator<T8>::iterator& current8)
        : base_(base),
          begin1_(g1.begin()), end1_(g1.end()), current1_(current1),
          begin2_(g2.begin()), end2_(g2.end()), current2_(current2),
          begin3_(g3.begin()), end3_(g3.end()), current3_(current3),
          begin4_(g4.begin()), end4_(g4.end()), current4_(current4),
          begin5_(g5.begin()), end5_(g5.end()), current5_(current5),
          begin6_(g6.begin()), end6_(g6.end()), current6_(current6),
          begin7_(g7.begin()), end7_(g7.end()), current7_(current7),
          begin8_(g8.begin()), end8_(g8.end()), current8_(current8)    {
      ComputeCurrentValue();
    }
    virtual ~Iterator() {}

    virtual const ParamGeneratorInterface<ParamType>* BaseGenerator() const {
      return base_;
    }
    // Advance should not be called on beyond-of-range iterators
    // so no component iterators must be beyond end of range, either.
    virtual void Advance() {
      assert(!AtEnd());
      ++current8_;
      if (current8_ == end8_) {
        current8_ = begin8_;
        ++current7_;
      }
      if (current7_ == end7_) {
        current7_ = begin7_;
        ++current6_;
      }
      if (current6_ == end6_) {
        current6_ = begin6_;
        ++current5_;
      }
      if (current5_ == end5_) {
        current5_ = begin5_;
        ++current4_;
      }
      if (current4_ == end4_) {
        current4_ = begin4_;
        ++current3_;
      }
      if (current3_ == end3_) {
        current3_ = begin3_;
        ++current2_;
      }
      if (current2_ == end2_) {
        current2_ = begin2_;
        ++current1_;
      }
      ComputeCurrentValue();
    }
    virtual ParamIteratorInterface<ParamType>* Clone() const {
      return new Iterator(*this);
    }
    virtual const ParamType* Current() const { return &current_value_; }
    virtual bool Equals(const ParamIteratorInterface<ParamType>& other) const {
      // Having the same base generator guarantees that the other
      // iterator is of the same type and we can downcast.
      GTEST_CHECK_(BaseGenerator() == other.BaseGenerator())
          << "The program attempted to compare iterators "
          << "from different generators." << std::endl;
      const Iterator* typed_other =
          CheckedDowncastToActualType<const Iterator>(&other);
      // We must report iterators equal if they both point beyond their
      // respective ranges. That can happen in a variety of fashions,
      // so we have to consult AtEnd().
      return (AtEnd() && typed_other->AtEnd()) ||
         (
          current1_ == typed_other->current1_ &&
          current2_ == typed_other->current2_ &&
          current3_ == typed_other->current3_ &&
          current4_ == typed_other->current4_ &&
          current5_ == typed_other->current5_ &&
          current6_ == typed_other->current6_ &&
          current7_ == typed_other->current7_ &&
          current8_ == typed_other->current8_);
    }

   private:
    Iterator(const Iterator& other)
        : base_(other.base_),
        begin1_(other.begin1_),
        end1_(other.end1_),
        current1_(other.current1_),
        begin2_(other.begin2_),
        end2_(other.end2_),
        current2_(other.current2_),
        begin3_(other.begin3_),
        end3_(other.end3_),
        current3_(other.current3_),
        begin4_(other.begin4_),
        end4_(other.end4_),
        current4_(other.current4_),
        begin5_(other.begin5_),
        end5_(other.end5_),
        current5_(other.current5_),
        begin6_(other.begin6_),
        end6_(other.end6_),
        current6_(other.current6_),
        begin7_(other.begin7_),
        end7_(other.end7_),
        current7_(other.current7_),
        begin8_(other.begin8_),
        end8_(other.end8_),
        current8_(other.current8_) {
      ComputeCurrentValue();
    }

    void ComputeCurrentValue() {
      if (!AtEnd())
        current_value_ = ParamType(*current1_, *current2_, *current3_,
            *current4_, *current5_, *current6_, *current7_, *current8_);
    }
    bool AtEnd() const {
      // We must report iterator past the end of the range when either of the
      // component iterators has reached the end of its range.
      return
          current1_ == end1_ ||
          current2_ == end2_ ||
          current3_ == end3_ ||
          current4_ == end4_ ||
          current5_ == end5_ ||
          current6_ == end6_ ||
          current7_ == end7_ ||
          current8_ == end8_;
    }

    // No implementation - assignment is unsupported.
    void operator=(const Iterator& other);

    const ParamGeneratorInterface<ParamType>* const base_;
    // begin[i]_ and end[i]_ define the i-th range that Iterator traverses.
    // current[i]_ is the actual traversing iterator.
    const typename ParamGenerator<T1>::iterator begin1_;
    const typename ParamGenerator<T1>::iterator end1_;
    typename ParamGenerator<T1>::iterator current1_;
    const typename ParamGenerator<T2>::iterator begin2_;
    const typename ParamGenerator<T2>::iterator end2_;
    typename ParamGenerator<T2>::iterator current2_;
    const typename ParamGenerator<T3>::iterator begin3_;
    const typename ParamGenerator<T3>::iterator end3_;
    typename ParamGenerator<T3>::iterator current3_;
    const typename ParamGenerator<T4>::iterator begin4_;
    const typename ParamGenerator<T4>::iterator end4_;
    typename ParamGenerator<T4>::iterator current4_;
    const typename ParamGenerator<T5>::iterator begin5_;
    const typename ParamGenerator<T5>::iterator end5_;
    typename ParamGenerator<T5>::iterator current5_;
    const typename ParamGenerator<T6>::iterator begin6_;
    const typename ParamGenerator<T6>::iterator end6_;
    typename ParamGenerator<T6>::iterator current6_;
    const typename ParamGenerator<T7>::iterator begin7_;
    const typename ParamGenerator<T7>::iterator end7_;
    typename ParamGenerator<T7>::iterator current7_;
    const typename ParamGenerator<T8>::iterator begin8_;
    const typename ParamGenerator<T8>::iterator end8_;
    typename ParamGenerator<T8>::iterator current8_;
    ParamType current_value_;
  };  // class CartesianProductGenerator8::Iterator

  // No implementation - assignment is unsupported.
  void operator=(const CartesianProductGenerator8& other);

  const ParamGenerator<T1> g1_;
  const ParamGenerator<T2> g2_;
  const ParamGenerator<T3> g3_;
  const ParamGenerator<T4> g4_;
  const ParamGenerator<T5> g5_;
  const ParamGenerator<T6> g6_;
  const ParamGenerator<T7> g7_;
  const ParamGenerator<T8> g8_;
};  // class CartesianProductGenerator8


template <typename T1, typename T2, typename T3, typename T4, typename T5,
    typename T6, typename T7, typename T8, typename T9>
class CartesianProductGenerator9
    : public ParamGeneratorInterface< ::testing::tuple<T1, T2, T3, T4, T5, T6,
        T7, T8, T9> > {
 public:
  typedef ::testing::tuple<T1, T2, T3, T4, T5, T6, T7, T8, T9> ParamType;

  CartesianProductGenerator9(const ParamGenerator<T1>& g1,
      const ParamGenerator<T2>& g2, const ParamGenerator<T3>& g3,
      const ParamGenerator<T4>& g4, const ParamGenerator<T5>& g5,
      const ParamGenerator<T6>& g6, const ParamGenerator<T7>& g7,
      const ParamGenerator<T8>& g8, const ParamGenerator<T9>& g9)
      : g1_(g1), g2_(g2), g3_(g3), g4_(g4), g5_(g5), g6_(g6), g7_(g7), g8_(g8),
          g9_(g9) {}
  virtual ~CartesianProductGenerator9() {}

  virtual ParamIteratorInterface<ParamType>* Begin() const {
    return new Iterator(this, g1_, g1_.begin(), g2_, g2_.begin(), g3_,
        g3_.begin(), g4_, g4_.begin(), g5_, g5_.begin(), g6_, g6_.begin(), g7_,
        g7_.begin(), g8_, g8_.begin(), g9_, g9_.begin());
  }
  virtual ParamIteratorInterface<ParamType>* End() const {
    return new Iterator(this, g1_, g1_.end(), g2_, g2_.end(), g3_, g3_.end(),
        g4_, g4_.end(), g5_, g5_.end(), g6_, g6_.end(), g7_, g7_.end(), g8_,
        g8_.end(), g9_, g9_.end());
  }

 private:
  class Iterator : public ParamIteratorInterface<ParamType> {
   public:
    Iterator(const ParamGeneratorInterface<ParamType>* base,
      const ParamGenerator<T1>& g1,
      const typename ParamGenerator<T1>::iterator& current1,
      const ParamGenerator<T2>& g2,
      const typename ParamGenerator<T2>::iterator& current2,
      const ParamGenerator<T3>& g3,
      const typename ParamGenerator<T3>::iterator& current3,
      const ParamGenerator<T4>& g4,
      const typename ParamGenerator<T4>::iterator& current4,
      const ParamGenerator<T5>& g5,
      const typename ParamGenerator<T5>::iterator& current5,
      const ParamGenerator<T6>& g6,
      const typename ParamGenerator<T6>::iterator& current6,
      const ParamGenerator<T7>& g7,
      const typename ParamGenerator<T7>::iterator& current7,
      const ParamGenerator<T8>& g8,
      const typename ParamGenerator<T8>::iterator& current8,
      const ParamGenerator<T9>& g9,
      const typename ParamGenerator<T9>::iterator& current9)
        : base_(base),
          begin1_(g1.begin()), end1_(g1.end()), current1_(current1),
          begin2_(g2.begin()), end2_(g2.end()), current2_(current2),
          begin3_(g3.begin()), end3_(g3.end()), current3_(current3),
          begin4_(g4.begin()), end4_(g4.end()), current4_(current4),
          begin5_(g5.begin()), end5_(g5.end()), current5_(current5),
          begin6_(g6.begin()), end6_(g6.end()), current6_(current6),
          begin7_(g7.begin()), end7_(g7.end()), current7_(current7),
          begin8_(g8.begin()), end8_(g8.end()), current8_(current8),
          begin9_(g9.begin()), end9_(g9.end()), current9_(current9)    {
      ComputeCurrentValue();
    }
    virtual ~Iterator() {}

    virtual const ParamGeneratorInterface<ParamType>* BaseGenerator() const {
      return base_;
    }
    // Advance should not be called on beyond-of-range iterators
    // so no component iterators must be beyond end of range, either.
    virtual void Advance() {
      assert(!AtEnd());
      ++current9_;
      if (current9_ == end9_) {
        current9_ = begin9_;
        ++current8_;
      }
      if (current8_ == end8_) {
        current8_ = begin8_;
        ++current7_;
      }
      if (current7_ == end7_) {
        current7_ = begin7_;
        ++current6_;
      }
      if (current6_ == end6_) {
        current6_ = begin6_;
        ++current5_;
      }
      if (current5_ == end5_) {
        current5_ = begin5_;
        ++current4_;
      }
      if (current4_ == end4_) {
        current4_ = begin4_;
        ++current3_;
      }
      if (current3_ == end3_) {
        current3_ = begin3_;
        ++current2_;
      }
      if (current2_ == end2_) {
        current2_ = begin2_;
        ++current1_;
      }
      ComputeCurrentValue();
    }
    virtual ParamIteratorInterface<ParamType>* Clone() const {
      return new Iterator(*this);
    }
    virtual const ParamType* Current() const { return &current_value_; }
    virtual bool Equals(const ParamIteratorInterface<ParamType>& other) const {
      // Having the same base generator guarantees that the other
      // iterator is of the same type and we can downcast.
      GTEST_CHECK_(BaseGenerator() == other.BaseGenerator())
          << "The program attempted to compare iterators "
          << "from different generators." << std::endl;
      const Iterator* typed_other =
          CheckedDowncastToActualType<const Iterator>(&other);
      // We must report iterators equal if they both point beyond their
      // respective ranges. That can happen in a variety of fashions,
      // so we have to consult AtEnd().
      return (AtEnd() && typed_other->AtEnd()) ||
         (
          current1_ == typed_other->current1_ &&
          current2_ == typed_other->current2_ &&
          current3_ == typed_other->current3_ &&
          current4_ == typed_other->current4_ &&
          current5_ == typed_other->current5_ &&
          current6_ == typed_other->current6_ &&
          current7_ == typed_other->current7_ &&
          current8_ == typed_other->current8_ &&
          current9_ == typed_other->current9_);
    }

   private:
    Iterator(const Iterator& other)
        : base_(other.base_),
        begin1_(other.begin1_),
        end1_(other.end1_),
        current1_(other.current1_),
        begin2_(other.begin2_),
        end2_(other.end2_),
        current2_(other.current2_),
        begin3_(other.begin3_),
        end3_(other.end3_),
        current3_(other.current3_),
        begin4_(other.begin4_),
        end4_(other.end4_),
        current4_(other.current4_),
        begin5_(other.begin5_),
        end5_(other.end5_),
        current5_(other.current5_),
        begin6_(other.begin6_),
        end6_(other.end6_),
        current6_(other.current6_),
        begin7_(other.begin7_),
        end7_(other.end7_),
        current7_(other.current7_),
        begin8_(other.begin8_),
        end8_(other.end8_),
        current8_(other.current8_),
        begin9_(other.begin9_),
        end9_(other.end9_),
        current9_(other.current9_) {
      ComputeCurrentValue();
    }

    void ComputeCurrentValue() {
      if (!AtEnd())
        current_value_ = ParamType(*current1_, *current2_, *current3_,
            *current4_, *current5_, *current6_, *current7_, *current8_,
            *current9_);
    }
    bool AtEnd() const {
      // We must report iterator past the end of the range when either of the
      // component iterators has reached the end of its range.
      return
          current1_ == end1_ ||
          current2_ == end2_ ||
          current3_ == end3_ ||
          current4_ == end4_ ||
          current5_ == end5_ ||
          current6_ == end6_ ||
          current7_ == end7_ ||
          current8_ == end8_ ||
          current9_ == end9_;
    }

    // No implementation - assignment is unsupported.
    void operator=(const Iterator& other);

    const ParamGeneratorInterface<ParamType>* const base_;
    // begin[i]_ and end[i]_ define the i-th range that Iterator traverses.
    // current[i]_ is the actual traversing iterator.
    const typename ParamGenerator<T1>::iterator begin1_;
    const typename ParamGenerator<T1>::iterator end1_;
    typename ParamGenerator<T1>::iterator current1_;
    const typename ParamGenerator<T2>::iterator begin2_;
    const typename ParamGenerator<T2>::iterator end2_;
    typename ParamGenerator<T2>::iterator current2_;
    const typename ParamGenerator<T3>::iterator begin3_;
    const typename ParamGenerator<T3>::iterator end3_;
    typename ParamGenerator<T3>::iterator current3_;
    const typename ParamGenerator<T4>::iterator begin4_;
    const typename ParamGenerator<T4>::iterator end4_;
    typename ParamGenerator<T4>::iterator current4_;
    const typename ParamGenerator<T5>::iterator begin5_;
    const typename ParamGenerator<T5>::iterator end5_;
    typename ParamGenerator<T5>::iterator current5_;
    const typename ParamGenerator<T6>::iterator begin6_;
    const typename ParamGenerator<T6>::iterator end6_;
    typename ParamGenerator<T6>::iterator current6_;
    const typename ParamGenerator<T7>::iterator begin7_;
    const typename ParamGenerator<T7>::iterator end7_;
    typename ParamGenerator<T7>::iterator current7_;
    const typename ParamGenerator<T8>::iterator begin8_;
    const typename ParamGenerator<T8>::iterator end8_;
    typename ParamGenerator<T8>::iterator current8_;
    const typename ParamGenerator<T9>::iterator begin9_;
    const typename ParamGenerator<T9>::iterator end9_;
    typename ParamGenerator<T9>::iterator current9_;
    ParamType current_value_;
  };  // class CartesianProductGenerator9::Iterator

  // No implementation - assignment is unsupported.
  void operator=(const CartesianProductGenerator9& other);

  const ParamGenerator<T1> g1_;
  const ParamGenerator<T2> g2_;
  const ParamGenerator<T3> g3_;
  const ParamGenerator<T4> g4_;
  const ParamGenerator<T5> g5_;
  const ParamGenerator<T6> g6_;
  const ParamGenerator<T7> g7_;
  const ParamGenerator<T8> g8_;
  const ParamGenerator<T9> g9_;
};  // class CartesianProductGenerator9


template <typename T1, typename T2, typename T3, typename T4, typename T5,
    typename T6, typename T7, typename T8, typename T9, typename T10>
class CartesianProductGenerator10
    : public ParamGeneratorInterface< ::testing::tuple<T1, T2, T3, T4, T5, T6,
        T7, T8, T9, T10> > {
 public:
  typedef ::testing::tuple<T1, T2, T3, T4, T5, T6, T7, T8, T9, T10> ParamType;

  CartesianProductGenerator10(const ParamGenerator<T1>& g1,
      const ParamGenerator<T2>& g2, const ParamGenerator<T3>& g3,
      const ParamGenerator<T4>& g4, const ParamGenerator<T5>& g5,
      const ParamGenerator<T6>& g6, const ParamGenerator<T7>& g7,
      const ParamGenerator<T8>& g8, const ParamGenerator<T9>& g9,
      const ParamGenerator<T10>& g10)
      : g1_(g1), g2_(g2), g3_(g3), g4_(g4), g5_(g5), g6_(g6), g7_(g7), g8_(g8),
          g9_(g9), g10_(g10) {}
  virtual ~CartesianProductGenerator10() {}

  virtual ParamIteratorInterface<ParamType>* Begin() const {
    return new Iterator(this, g1_, g1_.begin(), g2_, g2_.begin(), g3_,
        g3_.begin(), g4_, g4_.begin(), g5_, g5_.begin(), g6_, g6_.begin(), g7_,
        g7_.begin(), g8_, g8_.begin(), g9_, g9_.begin(), g10_, g10_.begin());
  }
  virtual ParamIteratorInterface<ParamType>* End() const {
    return new Iterator(this, g1_, g1_.end(), g2_, g2_.end(), g3_, g3_.end(),
        g4_, g4_.end(), g5_, g5_.end(), g6_, g6_.end(), g7_, g7_.end(), g8_,
        g8_.end(), g9_, g9_.end(), g10_, g10_.end());
  }

 private:
  class Iterator : public ParamIteratorInterface<ParamType> {
   public:
    Iterator(const ParamGeneratorInterface<ParamType>* base,
      const ParamGenerator<T1>& g1,
      const typename ParamGenerator<T1>::iterator& current1,
      const ParamGenerator<T2>& g2,
      const typename ParamGenerator<T2>::iterator& current2,
      const ParamGenerator<T3>& g3,
      const typename ParamGenerator<T3>::iterator& current3,
      const ParamGenerator<T4>& g4,
      const typename ParamGenerator<T4>::iterator& current4,
      const ParamGenerator<T5>& g5,
      const typename ParamGenerator<T5>::iterator& current5,
      const ParamGenerator<T6>& g6,
      const typename ParamGenerator<T6>::iterator& current6,
      const ParamGenerator<T7>& g7,
      const typename ParamGenerator<T7>::iterator& current7,
      const ParamGenerator<T8>& g8,
      const typename ParamGenerator<T8>::iterator& current8,
      const ParamGenerator<T9>& g9,
      const typename ParamGenerator<T9>::iterator& current9,
      const ParamGenerator<T10>& g10,
      const typename ParamGenerator<T10>::iterator& current10)
        : base_(base),
          begin1_(g1.begin()), end1_(g1.end()), current1_(current1),
          begin2_(g2.begin()), end2_(g2.end()), current2_(current2),
          begin3_(g3.begin()), end3_(g3.end()), current3_(current3),
          begin4_(g4.begin()), end4_(g4.end()), current4_(current4),
          begin5_(g5.begin()), end5_(g5.end()), current5_(current5),
          begin6_(g6.begin()), end6_(g6.end()), current6_(current6),
          begin7_(g7.begin()), end7_(g7.end()), current7_(current7),
          begin8_(g8.begin()), end8_(g8.end()), current8_(current8),
          begin9_(g9.begin()), end9_(g9.end()), current9_(current9),
          begin10_(g10.begin()), end10_(g10.end()), current10_(current10)    {
      ComputeCurrentValue();
    }
    virtual ~Iterator() {}

    virtual const ParamGeneratorInterface<ParamType>* BaseGenerator() const {
      return base_;
    }
    // Advance should not be called on beyond-of-range iterators
    // so no component iterators must be beyond end of range, either.
    virtual void Advance() {
      assert(!AtEnd());
      ++current10_;
      if (current10_ == end10_) {
        current10_ = begin10_;
        ++current9_;
      }
      if (current9_ == end9_) {
        current9_ = begin9_;
        ++current8_;
      }
      if (current8_ == end8_) {
        current8_ = begin8_;
        ++current7_;
      }
      if (current7_ == end7_) {
        current7_ = begin7_;
        ++current6_;
      }
      if (current6_ == end6_) {
        current6_ = begin6_;
        ++current5_;
      }
      if (current5_ == end5_) {
        current5_ = begin5_;
        ++current4_;
      }
      if (current4_ == end4_) {
        current4_ = begin4_;
        ++current3_;
      }
      if (current3_ == end3_) {
        current3_ = begin3_;
        ++current2_;
      }
      if (current2_ == end2_) {
        current2_ = begin2_;
        ++current1_;
      }
      ComputeCurrentValue();
    }
    virtual ParamIteratorInterface<ParamType>* Clone() const {
      return new Iterator(*this);
    }
    virtual const ParamType* Current() const { return &current_value_; }
    virtual bool Equals(const ParamIteratorInterface<ParamType>& other) const {
      // Having the same base generator guarantees that the other
      // iterator is of the same type and we can downcast.
      GTEST_CHECK_(BaseGenerator() == other.BaseGenerator())
          << "The program attempted to compare iterators "
          << "from different generators." << std::endl;
      const Iterator* typed_other =
          CheckedDowncastToActualType<const Iterator>(&other);
      // We must report iterators equal if they both point beyond their
      // respective ranges. That can happen in a variety of fashions,
      // so we have to consult AtEnd().
      return (AtEnd() && typed_other->AtEnd()) ||
         (
          current1_ == typed_other->current1_ &&
          current2_ == typed_other->current2_ &&
          current3_ == typed_other->current3_ &&
          current4_ == typed_other->current4_ &&
          current5_ == typed_other->current5_ &&
          current6_ == typed_other->current6_ &&
          current7_ == typed_other->current7_ &&
          current8_ == typed_other->current8_ &&
          current9_ == typed_other->current9_ &&
          current10_ == typed_other->current10_);
    }

   private:
    Iterator(const Iterator& other)
        : base_(other.base_),
        begin1_(other.begin1_),
        end1_(other.end1_),
        current1_(other.current1_),
        begin2_(other.begin2_),
        end2_(other.end2_),
        current2_(other.current2_),
        begin3_(other.begin3_),
        end3_(other.end3_),
        current3_(other.current3_),
        begin4_(other.begin4_),
        end4_(other.end4_),
        current4_(other.current4_),
        begin5_(other.begin5_),
        end5_(other.end5_),
        current5_(other.current5_),
        begin6_(other.begin6_),
        end6_(other.end6_),
        current6_(other.current6_),
        begin7_(other.begin7_),
        end7_(other.end7_),
        current7_(other.current7_),
        begin8_(other.begin8_),
        end8_(other.end8_),
        current8_(other.current8_),
        begin9_(other.begin9_),
        end9_(other.end9_),
        current9_(other.current9_),
        begin10_(other.begin10_),
        end10_(other.end10_),
        current10_(other.current10_) {
      ComputeCurrentValue();
    }

    void ComputeCurrentValue() {
      if (!AtEnd())
        current_value_ = ParamType(*current1_, *current2_, *current3_,
            *current4_, *current5_, *current6_, *current7_, *current8_,
            *current9_, *current10_);
    }
    bool AtEnd() const {
      // We must report iterator past the end of the range when either of the
      // component iterators has reached the end of its range.
      return
          current1_ == end1_ ||
          current2_ == end2_ ||
          current3_ == end3_ ||
          current4_ == end4_ ||
          current5_ == end5_ ||
          current6_ == end6_ ||
          current7_ == end7_ ||
          current8_ == end8_ ||
          current9_ == end9_ ||
          current10_ == end10_;
    }

    // No implementation - assignment is unsupported.
    void operator=(const Iterator& other);

    const ParamGeneratorInterface<ParamType>* const base_;
    // begin[i]_ and end[i]_ define the i-th range that Iterator traverses.
    // current[i]_ is the actual traversing iterator.
    const typename ParamGenerator<T1>::iterator begin1_;
    const typename ParamGenerator<T1>::iterator end1_;
    typename ParamGenerator<T1>::iterator current1_;
    const typename ParamGenerator<T2>::iterator begin2_;
    const typename ParamGenerator<T2>::iterator end2_;
    typename ParamGenerator<T2>::iterator current2_;
    const typename ParamGenerator<T3>::iterator begin3_;
    const typename ParamGenerator<T3>::iterator end3_;
    typename ParamGenerator<T3>::iterator current3_;
    const typename ParamGenerator<T4>::iterator begin4_;
    const typename ParamGenerator<T4>::iterator end4_;
    typename ParamGenerator<T4>::iterator current4_;
    const typename ParamGenerator<T5>::iterator begin5_;
    const typename ParamGenerator<T5>::iterator end5_;
    typename ParamGenerator<T5>::iterator current5_;
    const typename ParamGenerator<T6>::iterator begin6_;
    const typename ParamGenerator<T6>::iterator end6_;
    typename ParamGenerator<T6>::iterator current6_;
    const typename ParamGenerator<T7>::iterator begin7_;
    const typename ParamGenerator<T7>::iterator end7_;
    typename ParamGenerator<T7>::iterator current7_;
    const typename ParamGenerator<T8>::iterator begin8_;
    const typename ParamGenerator<T8>::iterator end8_;
    typename ParamGenerator<T8>::iterator current8_;
    const typename ParamGenerator<T9>::iterator begin9_;
    const typename ParamGenerator<T9>::iterator end9_;
    typename ParamGenerator<T9>::iterator current9_;
    const typename ParamGenerator<T10>::iterator begin10_;
    const typename ParamGenerator<T10>::iterator end10_;
    typename ParamGenerator<T10>::iterator current10_;
    ParamType current_value_;
  };  // class CartesianProductGenerator10::Iterator

  // No implementation - assignment is unsupported.
  void operator=(const CartesianProductGenerator10& other);

  const ParamGenerator<T1> g1_;
  const ParamGenerator<T2> g2_;
  const ParamGenerator<T3> g3_;
  const ParamGenerator<T4> g4_;
  const ParamGenerator<T5> g5_;
  const ParamGenerator<T6> g6_;
  const ParamGenerator<T7> g7_;
  const ParamGenerator<T8> g8_;
  const ParamGenerator<T9> g9_;
  const ParamGenerator<T10> g10_;
};  // class CartesianProductGenerator10


// INTERNAL IMPLEMENTATION - DO NOT USE IN USER CODE.
//
// Helper classes providing Combine() with polymorphic features. They allow
// casting CartesianProductGeneratorN<T> to ParamGenerator<U> if T is
// convertible to U.
//
template <class Generator1, class Generator2>
class CartesianProductHolder2 {
 public:
CartesianProductHolder2(const Generator1& g1, const Generator2& g2)
      : g1_(g1), g2_(g2) {}
  template <typename T1, typename T2>
  operator ParamGenerator< ::testing::tuple<T1, T2> >() const {
    return ParamGenerator< ::testing::tuple<T1, T2> >(
        new CartesianProductGenerator2<T1, T2>(
        static_cast<ParamGenerator<T1> >(g1_),
        static_cast<ParamGenerator<T2> >(g2_)));
  }

 private:
  // No implementation - assignment is unsupported.
  void operator=(const CartesianProductHolder2& other);

  const Generator1 g1_;
  const Generator2 g2_;
};  // class CartesianProductHolder2

template <class Generator1, class Generator2, class Generator3>
class CartesianProductHolder3 {
 public:
CartesianProductHolder3(const Generator1& g1, const Generator2& g2,
    const Generator3& g3)
      : g1_(g1), g2_(g2), g3_(g3) {}
  template <typename T1, typename T2, typename T3>
  operator ParamGenerator< ::testing::tuple<T1, T2, T3> >() const {
    return ParamGenerator< ::testing::tuple<T1, T2, T3> >(
        new CartesianProductGenerator3<T1, T2, T3>(
        static_cast<ParamGenerator<T1> >(g1_),
        static_cast<ParamGenerator<T2> >(g2_),
        static_cast<ParamGenerator<T3> >(g3_)));
  }

 private:
  // No implementation - assignment is unsupported.
  void operator=(const CartesianProductHolder3& other);

  const Generator1 g1_;
  const Generator2 g2_;
  const Generator3 g3_;
};  // class CartesianProductHolder3

template <class Generator1, class Generator2, class Generator3,
    class Generator4>
class CartesianProductHolder4 {
 public:
CartesianProductHolder4(const Generator1& g1, const Generator2& g2,
    const Generator3& g3, const Generator4& g4)
      : g1_(g1), g2_(g2), g3_(g3), g4_(g4) {}
  template <typename T1, typename T2, typename T3, typename T4>
  operator ParamGenerator< ::testing::tuple<T1, T2, T3, T4> >() const {
    return ParamGenerator< ::testing::tuple<T1, T2, T3, T4> >(
        new CartesianProductGenerator4<T1, T2, T3, T4>(
        static_cast<ParamGenerator<T1> >(g1_),
        static_cast<ParamGenerator<T2> >(g2_),
        static_cast<ParamGenerator<T3> >(g3_),
        static_cast<ParamGenerator<T4> >(g4_)));
  }

 private:
  // No implementation - assignment is unsupported.
  void operator=(const CartesianProductHolder4& other);

  const Generator1 g1_;
  const Generator2 g2_;
  const Generator3 g3_;
  const Generator4 g4_;
};  // class CartesianProductHolder4

template <class Generator1, class Generator2, class Generator3,
    class Generator4, class Generator5>
class CartesianProductHolder5 {
 public:
CartesianProductHolder5(const Generator1& g1, const Generator2& g2,
    const Generator3& g3, const Generator4& g4, const Generator5& g5)
      : g1_(g1), g2_(g2), g3_(g3), g4_(g4), g5_(g5) {}
  template <typename T1, typename T2, typename T3, typename T4, typename T5>
  operator ParamGenerator< ::testing::tuple<T1, T2, T3, T4, T5> >() const {
    return ParamGenerator< ::testing::tuple<T1, T2, T3, T4, T5> >(
        new CartesianProductGenerator5<T1, T2, T3, T4, T5>(
        static_cast<ParamGenerator<T1> >(g1_),
        static_cast<ParamGenerator<T2> >(g2_),
        static_cast<ParamGenerator<T3> >(g3_),
        static_cast<ParamGenerator<T4> >(g4_),
        static_cast<ParamGenerator<T5> >(g5_)));
  }

 private:
  // No implementation - assignment is unsupported.
  void operator=(const CartesianProductHolder5& other);

  const Generator1 g1_;
  const Generator2 g2_;
  const Generator3 g3_;
  const Generator4 g4_;
  const Generator5 g5_;
};  // class CartesianProductHolder5

template <class Generator1, class Generator2, class Generator3,
    class Generator4, class Generator5, class Generator6>
class CartesianProductHolder6 {
 public:
CartesianProductHolder6(const Generator1& g1, const Generator2& g2,
    const Generator3& g3, const Generator4& g4, const Generator5& g5,
    const Generator6& g6)
      : g1_(g1), g2_(g2), g3_(g3), g4_(g4), g5_(g5), g6_(g6) {}
  template <typename T1, typename T2, typename T3, typename T4, typename T5,
      typename T6>
  operator ParamGenerator< ::testing::tuple<T1, T2, T3, T4, T5, T6> >() const {
    return ParamGenerator< ::testing::tuple<T1, T2, T3, T4, T5, T6> >(
        new CartesianProductGenerator6<T1, T2, T3, T4, T5, T6>(
        static_cast<ParamGenerator<T1> >(g1_),
        static_cast<ParamGenerator<T2> >(g2_),
        static_cast<ParamGenerator<T3> >(g3_),
        static_cast<ParamGenerator<T4> >(g4_),
        static_cast<ParamGenerator<T5> >(g5_),
        static_cast<ParamGenerator<T6> >(g6_)));
  }

 private:
  // No implementation - assignment is unsupported.
  void operator=(const CartesianProductHolder6& other);

  const Generator1 g1_;
  const Generator2 g2_;
  const Generator3 g3_;
  const Generator4 g4_;
  const Generator5 g5_;
  const Generator6 g6_;
};  // class CartesianProductHolder6

template <class Generator1, class Generator2, class Generator3,
    class Generator4, class Generator5, class Generator6, class Generator7>
class CartesianProductHolder7 {
 public:
CartesianProductHolder7(const Generator1& g1, const Generator2& g2,
    const Generator3& g3, const Generator4& g4, const Generator5& g5,
    const Generator6& g6, const Generator7& g7)
      : g1_(g1), g2_(g2), g3_(g3), g4_(g4), g5_(g5), g6_(g6), g7_(g7) {}
  template <typename T1, typename T2, typename T3, typename T4, typename T5,
      typename T6, typename T7>
  operator ParamGenerator< ::testing::tuple<T1, T2, T3, T4, T5, T6,
      T7> >() const {
    return ParamGenerator< ::testing::tuple<T1, T2, T3, T4, T5, T6, T7> >(
        new CartesianProductGenerator7<T1, T2, T3, T4, T5, T6, T7>(
        static_cast<ParamGenerator<T1> >(g1_),
        static_cast<ParamGenerator<T2> >(g2_),
        static_cast<ParamGenerator<T3> >(g3_),
        static_cast<ParamGenerator<T4> >(g4_),
        static_cast<ParamGenerator<T5> >(g5_),
        static_cast<ParamGenerator<T6> >(g6_),
        static_cast<ParamGenerator<T7> >(g7_)));
  }

 private:
  // No implementation - assignment is unsupported.
  void operator=(const CartesianProductHolder7& other);

  const Generator1 g1_;
  const Generator2 g2_;
  const Generator3 g3_;
  const Generator4 g4_;
  const Generator5 g5_;
  const Generator6 g6_;
  const Generator7 g7_;
};  // class CartesianProductHolder7

template <class Generator1, class Generator2, class Generator3,
    class Generator4, class Generator5, class Generator6, class Generator7,
    class Generator8>
class CartesianProductHolder8 {
 public:
CartesianProductHolder8(const Generator1& g1, const Generator2& g2,
    const Generator3& g3, const Generator4& g4, const Generator5& g5,
    const Generator6& g6, const Generator7& g7, const Generator8& g8)
      : g1_(g1), g2_(g2), g3_(g3), g4_(g4), g5_(g5), g6_(g6), g7_(g7),
          g8_(g8) {}
  template <typename T1, typename T2, typename T3, typename T4, typename T5,
      typename T6, typename T7, typename T8>
  operator ParamGenerator< ::testing::tuple<T1, T2, T3, T4, T5, T6, T7,
      T8> >() const {
    return ParamGenerator< ::testing::tuple<T1, T2, T3, T4, T5, T6, T7, T8> >(
        new CartesianProductGenerator8<T1, T2, T3, T4, T5, T6, T7, T8>(
        static_cast<ParamGenerator<T1> >(g1_),
        static_cast<ParamGenerator<T2> >(g2_),
        static_cast<ParamGenerator<T3> >(g3_),
        static_cast<ParamGenerator<T4> >(g4_),
        static_cast<ParamGenerator<T5> >(g5_),
        static_cast<ParamGenerator<T6> >(g6_),
        static_cast<ParamGenerator<T7> >(g7_),
        static_cast<ParamGenerator<T8> >(g8_)));
  }

 private:
  // No implementation - assignment is unsupported.
  void operator=(const CartesianProductHolder8& other);

  const Generator1 g1_;
  const Generator2 g2_;
  const Generator3 g3_;
  const Generator4 g4_;
  const Generator5 g5_;
  const Generator6 g6_;
  const Generator7 g7_;
  const Generator8 g8_;
};  // class CartesianProductHolder8

template <class Generator1, class Generator2, class Generator3,
    class Generator4, class Generator5, class Generator6, class Generator7,
    class Generator8, class Generator9>
class CartesianProductHolder9 {
 public:
CartesianProductHolder9(const Generator1& g1, const Generator2& g2,
    const Generator3& g3, const Generator4& g4, const Generator5& g5,
    const Generator6& g6, const Generator7& g7, const Generator8& g8,
    const Generator9& g9)
      : g1_(g1), g2_(g2), g3_(g3), g4_(g4), g5_(g5), g6_(g6), g7_(g7), g8_(g8),
          g9_(g9) {}
  template <typename T1, typename T2, typename T3, typename T4, typename T5,
      typename T6, typename T7, typename T8, typename T9>
  operator ParamGenerator< ::testing::tuple<T1, T2, T3, T4, T5, T6, T7, T8,
      T9> >() const {
    return ParamGenerator< ::testing::tuple<T1, T2, T3, T4, T5, T6, T7, T8,
        T9> >(
        new CartesianProductGenerator9<T1, T2, T3, T4, T5, T6, T7, T8, T9>(
        static_cast<ParamGenerator<T1> >(g1_),
        static_cast<ParamGenerator<T2> >(g2_),
        static_cast<ParamGenerator<T3> >(g3_),
        static_cast<ParamGenerator<T4> >(g4_),
        static_cast<ParamGenerator<T5> >(g5_),
        static_cast<ParamGenerator<T6> >(g6_),
        static_cast<ParamGenerator<T7> >(g7_),
        static_cast<ParamGenerator<T8> >(g8_),
        static_cast<ParamGenerator<T9> >(g9_)));
  }

 private:
  // No implementation - assignment is unsupported.
  void operator=(const CartesianProductHolder9& other);

  const Generator1 g1_;
  const Generator2 g2_;
  const Generator3 g3_;
  const Generator4 g4_;
  const Generator5 g5_;
  const Generator6 g6_;
  const Generator7 g7_;
  const Generator8 g8_;
  const Generator9 g9_;
};  // class CartesianProductHolder9

template <class Generator1, class Generator2, class Generator3,
    class Generator4, class Generator5, class Generator6, class Generator7,
    class Generator8, class Generator9, class Generator10>
class CartesianProductHolder10 {
 public:
CartesianProductHolder10(const Generator1& g1, const Generator2& g2,
    const Generator3& g3, const Generator4& g4, const Generator5& g5,
    const Generator6& g6, const Generator7& g7, const Generator8& g8,
    const Generator9& g9, const Generator10& g10)
      : g1_(g1), g2_(g2), g3_(g3), g4_(g4), g5_(g5), g6_(g6), g7_(g7), g8_(g8),
          g9_(g9), g10_(g10) {}
  template <typename T1, typename T2, typename T3, typename T4, typename T5,
      typename T6, typename T7, typename T8, typename T9, typename T10>
  operator ParamGenerator< ::testing::tuple<T1, T2, T3, T4, T5, T6, T7, T8, T9,
      T10> >() const {
    return ParamGenerator< ::testing::tuple<T1, T2, T3, T4, T5, T6, T7, T8, T9,
        T10> >(
        new CartesianProductGenerator10<T1, T2, T3, T4, T5, T6, T7, T8, T9,
            T10>(
        static_cast<ParamGenerator<T1> >(g1_),
        static_cast<ParamGenerator<T2> >(g2_),
        static_cast<ParamGenerator<T3> >(g3_),
        static_cast<ParamGenerator<T4> >(g4_),
        static_cast<ParamGenerator<T5> >(g5_),
        static_cast<ParamGenerator<T6> >(g6_),
        static_cast<ParamGenerator<T7> >(g7_),
        static_cast<ParamGenerator<T8> >(g8_),
        static_cast<ParamGenerator<T9> >(g9_),
        static_cast<ParamGenerator<T10> >(g10_)));
  }

 private:
  // No implementation - assignment is unsupported.
  void operator=(const CartesianProductHolder10& other);

  const Generator1 g1_;
  const Generator2 g2_;
  const Generator3 g3_;
  const Generator4 g4_;
  const Generator5 g5_;
  const Generator6 g6_;
  const Generator7 g7_;
  const Generator8 g8_;
  const Generator9 g9_;
  const Generator10 g10_;
};  // class CartesianProductHolder10

# endif  // GTEST_HAS_COMBINE

}  // namespace internal
}  // namespace testing

#endif  //  GTEST_HAS_PARAM_TEST

#endif  // GTEST_INCLUDE_GTEST_INTERNAL_GTEST_PARAM_UTIL_GENERATED_H_

#if GTEST_HAS_PARAM_TEST

namespace testing {

// Functions producing parameter generators.
//
// Google Test uses these generators to produce parameters for value-
// parameterized tests. When a parameterized test case is instantiated
// with a particular generator, Google Test creates and runs tests
// for each element in the sequence produced by the generator.
//
// In the following sample, tests from test case FooTest are instantiated
// each three times with parameter values 3, 5, and 8:
//
// class FooTest : public TestWithParam<int> { ... };
//
// TEST_P(FooTest, TestThis) {
// }
// TEST_P(FooTest, TestThat) {
// }
// INSTANTIATE_TEST_CASE_P(TestSequence, FooTest, Values(3, 5, 8));
//

// Range() returns generators providing sequences of values in a range.
//
// Synopsis:
// Range(start, end)
//   - returns a generator producing a sequence of values {start, start+1,
//     start+2, ..., }.
// Range(start, end, step)
//   - returns a generator producing a sequence of values {start, start+step,
//     start+step+step, ..., }.
// Notes:
//   * The generated sequences never include end. For example, Range(1, 5)
//     returns a generator producing a sequence {1, 2, 3, 4}. Range(1, 9, 2)
//     returns a generator producing {1, 3, 5, 7}.
//   * start and end must have the same type. That type may be any integral or
//     floating-point type or a user defined type satisfying these conditions:
//     * It must be assignable (have operator=() defined).
//     * It must have operator+() (operator+(int-compatible type) for
//       two-operand version).
//     * It must have operator<() defined.
//     Elements in the resulting sequences will also have that type.
//   * Condition start < end must be satisfied in order for resulting sequences
//     to contain any elements.
//
template <typename T, typename IncrementT>
internal::ParamGenerator<T> Range(T start, T end, IncrementT step) {
  return internal::ParamGenerator<T>(
      new internal::RangeGenerator<T, IncrementT>(start, end, step));
}

template <typename T>
internal::ParamGenerator<T> Range(T start, T end) {
  return Range(start, end, 1);
}

// ValuesIn() function allows generation of tests with parameters coming from
// a container.
//
// Synopsis:
// ValuesIn(const T (&array)[N])
//   - returns a generator producing sequences with elements from
//     a C-style array.
// ValuesIn(const Container& container)
//   - returns a generator producing sequences with elements from
//     an STL-style container.
// ValuesIn(Iterator begin, Iterator end)
//   - returns a generator producing sequences with elements from
//     a range [begin, end) defined by a pair of STL-style iterators. These
//     iterators can also be plain C pointers.
//
// Please note that ValuesIn copies the values from the containers
// passed in and keeps them to generate tests in RUN_ALL_TESTS().
//
// Examples:
//
// This instantiates tests from test case StringTest
// each with C-string values of "foo", "bar", and "baz":
//
// const char* strings[] = {"foo", "bar", "baz"};
// INSTANTIATE_TEST_CASE_P(StringSequence, SrtingTest, ValuesIn(strings));
//
// This instantiates tests from test case StlStringTest
// each with STL strings with values "a" and "b":
//
// ::std::vector< ::std::string> GetParameterStrings() {
//   ::std::vector< ::std::string> v;
//   v.push_back("a");
//   v.push_back("b");
//   return v;
// }
//
// INSTANTIATE_TEST_CASE_P(CharSequence,
//                         StlStringTest,
//                         ValuesIn(GetParameterStrings()));
//
//
// This will also instantiate tests from CharTest
// each with parameter values 'a' and 'b':
//
// ::std::list<char> GetParameterChars() {
//   ::std::list<char> list;
//   list.push_back('a');
//   list.push_back('b');
//   return list;
// }
// ::std::list<char> l = GetParameterChars();
// INSTANTIATE_TEST_CASE_P(CharSequence2,
//                         CharTest,
//                         ValuesIn(l.begin(), l.end()));
//
template <typename ForwardIterator>
internal::ParamGenerator<
  typename ::testing::internal::IteratorTraits<ForwardIterator>::value_type>
ValuesIn(ForwardIterator begin, ForwardIterator end) {
  typedef typename ::testing::internal::IteratorTraits<ForwardIterator>
      ::value_type ParamType;
  return internal::ParamGenerator<ParamType>(
      new internal::ValuesInIteratorRangeGenerator<ParamType>(begin, end));
}

template <typename T, size_t N>
internal::ParamGenerator<T> ValuesIn(const T (&array)[N]) {
  return ValuesIn(array, array + N);
}

template <class Container>
internal::ParamGenerator<typename Container::value_type> ValuesIn(
    const Container& container) {
  return ValuesIn(container.begin(), container.end());
}

// Values() allows generating tests from explicitly specified list of
// parameters.
//
// Synopsis:
// Values(T v1, T v2, ..., T vN)
//   - returns a generator producing sequences with elements v1, v2, ..., vN.
//
// For example, this instantiates tests from test case BarTest each
// with values "one", "two", and "three":
//
// INSTANTIATE_TEST_CASE_P(NumSequence, BarTest, Values("one", "two", "three"));
//
// This instantiates tests from test case BazTest each with values 1, 2, 3.5.
// The exact type of values will depend on the type of parameter in BazTest.
//
// INSTANTIATE_TEST_CASE_P(FloatingNumbers, BazTest, Values(1, 2, 3.5));
//
// Currently, Values() supports from 1 to 50 parameters.
//
template <typename T1>
internal::ValueArray1<T1> Values(T1 v1) {
  return internal::ValueArray1<T1>(v1);
}

template <typename T1, typename T2>
internal::ValueArray2<T1, T2> Values(T1 v1, T2 v2) {
  return internal::ValueArray2<T1, T2>(v1, v2);
}

template <typename T1, typename T2, typename T3>
internal::ValueArray3<T1, T2, T3> Values(T1 v1, T2 v2, T3 v3) {
  return internal::ValueArray3<T1, T2, T3>(v1, v2, v3);
}

template <typename T1, typename T2, typename T3, typename T4>
internal::ValueArray4<T1, T2, T3, T4> Values(T1 v1, T2 v2, T3 v3, T4 v4) {
  return internal::ValueArray4<T1, T2, T3, T4>(v1, v2, v3, v4);
}

template <typename T1, typename T2, typename T3, typename T4, typename T5>
internal::ValueArray5<T1, T2, T3, T4, T5> Values(T1 v1, T2 v2, T3 v3, T4 v4,
    T5 v5) {
  return internal::ValueArray5<T1, T2, T3, T4, T5>(v1, v2, v3, v4, v5);
}

template <typename T1, typename T2, typename T3, typename T4, typename T5,
    typename T6>
internal::ValueArray6<T1, T2, T3, T4, T5, T6> Values(T1 v1, T2 v2, T3 v3,
    T4 v4, T5 v5, T6 v6) {
  return internal::ValueArray6<T1, T2, T3, T4, T5, T6>(v1, v2, v3, v4, v5, v6);
}

template <typename T1, typename T2, typename T3, typename T4, typename T5,
    typename T6, typename T7>
internal::ValueArray7<T1, T2, T3, T4, T5, T6, T7> Values(T1 v1, T2 v2, T3 v3,
    T4 v4, T5 v5, T6 v6, T7 v7) {
  return internal::ValueArray7<T1, T2, T3, T4, T5, T6, T7>(v1, v2, v3, v4, v5,
      v6, v7);
}

template <typename T1, typename T2, typename T3, typename T4, typename T5,
    typename T6, typename T7, typename T8>
internal::ValueArray8<T1, T2, T3, T4, T5, T6, T7, T8> Values(T1 v1, T2 v2,
    T3 v3, T4 v4, T5 v5, T6 v6, T7 v7, T8 v8) {
  return internal::ValueArray8<T1, T2, T3, T4, T5, T6, T7, T8>(v1, v2, v3, v4,
      v5, v6, v7, v8);
}

template <typename T1, typename T2, typename T3, typename T4, typename T5,
    typename T6, typename T7, typename T8, typename T9>
internal::ValueArray9<T1, T2, T3, T4, T5, T6, T7, T8, T9> Values(T1 v1, T2 v2,
    T3 v3, T4 v4, T5 v5, T6 v6, T7 v7, T8 v8, T9 v9) {
  return internal::ValueArray9<T1, T2, T3, T4, T5, T6, T7, T8, T9>(v1, v2, v3,
      v4, v5, v6, v7, v8, v9);
}

template <typename T1, typename T2, typename T3, typename T4, typename T5,
    typename T6, typename T7, typename T8, typename T9, typename T10>
internal::ValueArray10<T1, T2, T3, T4, T5, T6, T7, T8, T9, T10> Values(T1 v1,
    T2 v2, T3 v3, T4 v4, T5 v5, T6 v6, T7 v7, T8 v8, T9 v9, T10 v10) {
  return internal::ValueArray10<T1, T2, T3, T4, T5, T6, T7, T8, T9, T10>(v1,
      v2, v3, v4, v5, v6, v7, v8, v9, v10);
}

template <typename T1, typename T2, typename T3, typename T4, typename T5,
    typename T6, typename T7, typename T8, typename T9, typename T10,
    typename T11>
internal::ValueArray11<T1, T2, T3, T4, T5, T6, T7, T8, T9, T10,
    T11> Values(T1 v1, T2 v2, T3 v3, T4 v4, T5 v5, T6 v6, T7 v7, T8 v8, T9 v9,
    T10 v10, T11 v11) {
  return internal::ValueArray11<T1, T2, T3, T4, T5, T6, T7, T8, T9, T10,
      T11>(v1, v2, v3, v4, v5, v6, v7, v8, v9, v10, v11);
}

template <typename T1, typename T2, typename T3, typename T4, typename T5,
    typename T6, typename T7, typename T8, typename T9, typename T10,
    typename T11, typename T12>
internal::ValueArray12<T1, T2, T3, T4, T5, T6, T7, T8, T9, T10, T11,
    T12> Values(T1 v1, T2 v2, T3 v3, T4 v4, T5 v5, T6 v6, T7 v7, T8 v8, T9 v9,
    T10 v10, T11 v11, T12 v12) {
  return internal::ValueArray12<T1, T2, T3, T4, T5, T6, T7, T8, T9, T10, T11,
      T12>(v1, v2, v3, v4, v5, v6, v7, v8, v9, v10, v11, v12);
}

template <typename T1, typename T2, typename T3, typename T4, typename T5,
    typename T6, typename T7, typename T8, typename T9, typename T10,
    typename T11, typename T12, typename T13>
internal::ValueArray13<T1, T2, T3, T4, T5, T6, T7, T8, T9, T10, T11, T12,
    T13> Values(T1 v1, T2 v2, T3 v3, T4 v4, T5 v5, T6 v6, T7 v7, T8 v8, T9 v9,
    T10 v10, T11 v11, T12 v12, T13 v13) {
  return internal::ValueArray13<T1, T2, T3, T4, T5, T6, T7, T8, T9, T10, T11,
      T12, T13>(v1, v2, v3, v4, v5, v6, v7, v8, v9, v10, v11, v12, v13);
}

template <typename T1, typename T2, typename T3, typename T4, typename T5,
    typename T6, typename T7, typename T8, typename T9, typename T10,
    typename T11, typename T12, typename T13, typename T14>
internal::ValueArray14<T1, T2, T3, T4, T5, T6, T7, T8, T9, T10, T11, T12, T13,
    T14> Values(T1 v1, T2 v2, T3 v3, T4 v4, T5 v5, T6 v6, T7 v7, T8 v8, T9 v9,
    T10 v10, T11 v11, T12 v12, T13 v13, T14 v14) {
  return internal::ValueArray14<T1, T2, T3, T4, T5, T6, T7, T8, T9, T10, T11,
      T12, T13, T14>(v1, v2, v3, v4, v5, v6, v7, v8, v9, v10, v11, v12, v13,
      v14);
}

template <typename T1, typename T2, typename T3, typename T4, typename T5,
    typename T6, typename T7, typename T8, typename T9, typename T10,
    typename T11, typename T12, typename T13, typename T14, typename T15>
internal::ValueArray15<T1, T2, T3, T4, T5, T6, T7, T8, T9, T10, T11, T12, T13,
    T14, T15> Values(T1 v1, T2 v2, T3 v3, T4 v4, T5 v5, T6 v6, T7 v7, T8 v8,
    T9 v9, T10 v10, T11 v11, T12 v12, T13 v13, T14 v14, T15 v15) {
  return internal::ValueArray15<T1, T2, T3, T4, T5, T6, T7, T8, T9, T10, T11,
      T12, T13, T14, T15>(v1, v2, v3, v4, v5, v6, v7, v8, v9, v10, v11, v12,
      v13, v14, v15);
}

template <typename T1, typename T2, typename T3, typename T4, typename T5,
    typename T6, typename T7, typename T8, typename T9, typename T10,
    typename T11, typename T12, typename T13, typename T14, typename T15,
    typename T16>
internal::ValueArray16<T1, T2, T3, T4, T5, T6, T7, T8, T9, T10, T11, T12, T13,
    T14, T15, T16> Values(T1 v1, T2 v2, T3 v3, T4 v4, T5 v5, T6 v6, T7 v7,
    T8 v8, T9 v9, T10 v10, T11 v11, T12 v12, T13 v13, T14 v14, T15 v15,
    T16 v16) {
  return internal::ValueArray16<T1, T2, T3, T4, T5, T6, T7, T8, T9, T10, T11,
      T12, T13, T14, T15, T16>(v1, v2, v3, v4, v5, v6, v7, v8, v9, v10, v11,
      v12, v13, v14, v15, v16);
}

template <typename T1, typename T2, typename T3, typename T4, typename T5,
    typename T6, typename T7, typename T8, typename T9, typename T10,
    typename T11, typename T12, typename T13, typename T14, typename T15,
    typename T16, typename T17>
internal::ValueArray17<T1, T2, T3, T4, T5, T6, T7, T8, T9, T10, T11, T12, T13,
    T14, T15, T16, T17> Values(T1 v1, T2 v2, T3 v3, T4 v4, T5 v5, T6 v6, T7 v7,
    T8 v8, T9 v9, T10 v10, T11 v11, T12 v12, T13 v13, T14 v14, T15 v15,
    T16 v16, T17 v17) {
  return internal::ValueArray17<T1, T2, T3, T4, T5, T6, T7, T8, T9, T10, T11,
      T12, T13, T14, T15, T16, T17>(v1, v2, v3, v4, v5, v6, v7, v8, v9, v10,
      v11, v12, v13, v14, v15, v16, v17);
}

template <typename T1, typename T2, typename T3, typename T4, typename T5,
    typename T6, typename T7, typename T8, typename T9, typename T10,
    typename T11, typename T12, typename T13, typename T14, typename T15,
    typename T16, typename T17, typename T18>
internal::ValueArray18<T1, T2, T3, T4, T5, T6, T7, T8, T9, T10, T11, T12, T13,
    T14, T15, T16, T17, T18> Values(T1 v1, T2 v2, T3 v3, T4 v4, T5 v5, T6 v6,
    T7 v7, T8 v8, T9 v9, T10 v10, T11 v11, T12 v12, T13 v13, T14 v14, T15 v15,
    T16 v16, T17 v17, T18 v18) {
  return internal::ValueArray18<T1, T2, T3, T4, T5, T6, T7, T8, T9, T10, T11,
      T12, T13, T14, T15, T16, T17, T18>(v1, v2, v3, v4, v5, v6, v7, v8, v9,
      v10, v11, v12, v13, v14, v15, v16, v17, v18);
}

template <typename T1, typename T2, typename T3, typename T4, typename T5,
    typename T6, typename T7, typename T8, typename T9, typename T10,
    typename T11, typename T12, typename T13, typename T14, typename T15,
    typename T16, typename T17, typename T18, typename T19>
internal::ValueArray19<T1, T2, T3, T4, T5, T6, T7, T8, T9, T10, T11, T12, T13,
    T14, T15, T16, T17, T18, T19> Values(T1 v1, T2 v2, T3 v3, T4 v4, T5 v5,
    T6 v6, T7 v7, T8 v8, T9 v9, T10 v10, T11 v11, T12 v12, T13 v13, T14 v14,
    T15 v15, T16 v16, T17 v17, T18 v18, T19 v19) {
  return internal::ValueArray19<T1, T2, T3, T4, T5, T6, T7, T8, T9, T10, T11,
      T12, T13, T14, T15, T16, T17, T18, T19>(v1, v2, v3, v4, v5, v6, v7, v8,
      v9, v10, v11, v12, v13, v14, v15, v16, v17, v18, v19);
}

template <typename T1, typename T2, typename T3, typename T4, typename T5,
    typename T6, typename T7, typename T8, typename T9, typename T10,
    typename T11, typename T12, typename T13, typename T14, typename T15,
    typename T16, typename T17, typename T18, typename T19, typename T20>
internal::ValueArray20<T1, T2, T3, T4, T5, T6, T7, T8, T9, T10, T11, T12, T13,
    T14, T15, T16, T17, T18, T19, T20> Values(T1 v1, T2 v2, T3 v3, T4 v4,
    T5 v5, T6 v6, T7 v7, T8 v8, T9 v9, T10 v10, T11 v11, T12 v12, T13 v13,
    T14 v14, T15 v15, T16 v16, T17 v17, T18 v18, T19 v19, T20 v20) {
  return internal::ValueArray20<T1, T2, T3, T4, T5, T6, T7, T8, T9, T10, T11,
      T12, T13, T14, T15, T16, T17, T18, T19, T20>(v1, v2, v3, v4, v5, v6, v7,
      v8, v9, v10, v11, v12, v13, v14, v15, v16, v17, v18, v19, v20);
}

template <typename T1, typename T2, typename T3, typename T4, typename T5,
    typename T6, typename T7, typename T8, typename T9, typename T10,
    typename T11, typename T12, typename T13, typename T14, typename T15,
    typename T16, typename T17, typename T18, typename T19, typename T20,
    typename T21>
internal::ValueArray21<T1, T2, T3, T4, T5, T6, T7, T8, T9, T10, T11, T12, T13,
    T14, T15, T16, T17, T18, T19, T20, T21> Values(T1 v1, T2 v2, T3 v3, T4 v4,
    T5 v5, T6 v6, T7 v7, T8 v8, T9 v9, T10 v10, T11 v11, T12 v12, T13 v13,
    T14 v14, T15 v15, T16 v16, T17 v17, T18 v18, T19 v19, T20 v20, T21 v21) {
  return internal::ValueArray21<T1, T2, T3, T4, T5, T6, T7, T8, T9, T10, T11,
      T12, T13, T14, T15, T16, T17, T18, T19, T20, T21>(v1, v2, v3, v4, v5, v6,
      v7, v8, v9, v10, v11, v12, v13, v14, v15, v16, v17, v18, v19, v20, v21);
}

template <typename T1, typename T2, typename T3, typename T4, typename T5,
    typename T6, typename T7, typename T8, typename T9, typename T10,
    typename T11, typename T12, typename T13, typename T14, typename T15,
    typename T16, typename T17, typename T18, typename T19, typename T20,
    typename T21, typename T22>
internal::ValueArray22<T1, T2, T3, T4, T5, T6, T7, T8, T9, T10, T11, T12, T13,
    T14, T15, T16, T17, T18, T19, T20, T21, T22> Values(T1 v1, T2 v2, T3 v3,
    T4 v4, T5 v5, T6 v6, T7 v7, T8 v8, T9 v9, T10 v10, T11 v11, T12 v12,
    T13 v13, T14 v14, T15 v15, T16 v16, T17 v17, T18 v18, T19 v19, T20 v20,
    T21 v21, T22 v22) {
  return internal::ValueArray22<T1, T2, T3, T4, T5, T6, T7, T8, T9, T10, T11,
      T12, T13, T14, T15, T16, T17, T18, T19, T20, T21, T22>(v1, v2, v3, v4,
      v5, v6, v7, v8, v9, v10, v11, v12, v13, v14, v15, v16, v17, v18, v19,
      v20, v21, v22);
}

template <typename T1, typename T2, typename T3, typename T4, typename T5,
    typename T6, typename T7, typename T8, typename T9, typename T10,
    typename T11, typename T12, typename T13, typename T14, typename T15,
    typename T16, typename T17, typename T18, typename T19, typename T20,
    typename T21, typename T22, typename T23>
internal::ValueArray23<T1, T2, T3, T4, T5, T6, T7, T8, T9, T10, T11, T12, T13,
    T14, T15, T16, T17, T18, T19, T20, T21, T22, T23> Values(T1 v1, T2 v2,
    T3 v3, T4 v4, T5 v5, T6 v6, T7 v7, T8 v8, T9 v9, T10 v10, T11 v11, T12 v12,
    T13 v13, T14 v14, T15 v15, T16 v16, T17 v17, T18 v18, T19 v19, T20 v20,
    T21 v21, T22 v22, T23 v23) {
  return internal::ValueArray23<T1, T2, T3, T4, T5, T6, T7, T8, T9, T10, T11,
      T12, T13, T14, T15, T16, T17, T18, T19, T20, T21, T22, T23>(v1, v2, v3,
      v4, v5, v6, v7, v8, v9, v10, v11, v12, v13, v14, v15, v16, v17, v18, v19,
      v20, v21, v22, v23);
}

template <typename T1, typename T2, typename T3, typename T4, typename T5,
    typename T6, typename T7, typename T8, typename T9, typename T10,
    typename T11, typename T12, typename T13, typename T14, typename T15,
    typename T16, typename T17, typename T18, typename T19, typename T20,
    typename T21, typename T22, typename T23, typename T24>
internal::ValueArray24<T1, T2, T3, T4, T5, T6, T7, T8, T9, T10, T11, T12, T13,
    T14, T15, T16, T17, T18, T19, T20, T21, T22, T23, T24> Values(T1 v1, T2 v2,
    T3 v3, T4 v4, T5 v5, T6 v6, T7 v7, T8 v8, T9 v9, T10 v10, T11 v11, T12 v12,
    T13 v13, T14 v14, T15 v15, T16 v16, T17 v17, T18 v18, T19 v19, T20 v20,
    T21 v21, T22 v22, T23 v23, T24 v24) {
  return internal::ValueArray24<T1, T2, T3, T4, T5, T6, T7, T8, T9, T10, T11,
      T12, T13, T14, T15, T16, T17, T18, T19, T20, T21, T22, T23, T24>(v1, v2,
      v3, v4, v5, v6, v7, v8, v9, v10, v11, v12, v13, v14, v15, v16, v17, v18,
      v19, v20, v21, v22, v23, v24);
}

template <typename T1, typename T2, typename T3, typename T4, typename T5,
    typename T6, typename T7, typename T8, typename T9, typename T10,
    typename T11, typename T12, typename T13, typename T14, typename T15,
    typename T16, typename T17, typename T18, typename T19, typename T20,
    typename T21, typename T22, typename T23, typename T24, typename T25>
internal::ValueArray25<T1, T2, T3, T4, T5, T6, T7, T8, T9, T10, T11, T12, T13,
    T14, T15, T16, T17, T18, T19, T20, T21, T22, T23, T24, T25> Values(T1 v1,
    T2 v2, T3 v3, T4 v4, T5 v5, T6 v6, T7 v7, T8 v8, T9 v9, T10 v10, T11 v11,
    T12 v12, T13 v13, T14 v14, T15 v15, T16 v16, T17 v17, T18 v18, T19 v19,
    T20 v20, T21 v21, T22 v22, T23 v23, T24 v24, T25 v25) {
  return internal::ValueArray25<T1, T2, T3, T4, T5, T6, T7, T8, T9, T10, T11,
      T12, T13, T14, T15, T16, T17, T18, T19, T20, T21, T22, T23, T24, T25>(v1,
      v2, v3, v4, v5, v6, v7, v8, v9, v10, v11, v12, v13, v14, v15, v16, v17,
      v18, v19, v20, v21, v22, v23, v24, v25);
}

template <typename T1, typename T2, typename T3, typename T4, typename T5,
    typename T6, typename T7, typename T8, typename T9, typename T10,
    typename T11, typename T12, typename T13, typename T14, typename T15,
    typename T16, typename T17, typename T18, typename T19, typename T20,
    typename T21, typename T22, typename T23, typename T24, typename T25,
    typename T26>
internal::ValueArray26<T1, T2, T3, T4, T5, T6, T7, T8, T9, T10, T11, T12, T13,
    T14, T15, T16, T17, T18, T19, T20, T21, T22, T23, T24, T25,
    T26> Values(T1 v1, T2 v2, T3 v3, T4 v4, T5 v5, T6 v6, T7 v7, T8 v8, T9 v9,
    T10 v10, T11 v11, T12 v12, T13 v13, T14 v14, T15 v15, T16 v16, T17 v17,
    T18 v18, T19 v19, T20 v20, T21 v21, T22 v22, T23 v23, T24 v24, T25 v25,
    T26 v26) {
  return internal::ValueArray26<T1, T2, T3, T4, T5, T6, T7, T8, T9, T10, T11,
      T12, T13, T14, T15, T16, T17, T18, T19, T20, T21, T22, T23, T24, T25,
      T26>(v1, v2, v3, v4, v5, v6, v7, v8, v9, v10, v11, v12, v13, v14, v15,
      v16, v17, v18, v19, v20, v21, v22, v23, v24, v25, v26);
}

template <typename T1, typename T2, typename T3, typename T4, typename T5,
    typename T6, typename T7, typename T8, typename T9, typename T10,
    typename T11, typename T12, typename T13, typename T14, typename T15,
    typename T16, typename T17, typename T18, typename T19, typename T20,
    typename T21, typename T22, typename T23, typename T24, typename T25,
    typename T26, typename T27>
internal::ValueArray27<T1, T2, T3, T4, T5, T6, T7, T8, T9, T10, T11, T12, T13,
    T14, T15, T16, T17, T18, T19, T20, T21, T22, T23, T24, T25, T26,
    T27> Values(T1 v1, T2 v2, T3 v3, T4 v4, T5 v5, T6 v6, T7 v7, T8 v8, T9 v9,
    T10 v10, T11 v11, T12 v12, T13 v13, T14 v14, T15 v15, T16 v16, T17 v17,
    T18 v18, T19 v19, T20 v20, T21 v21, T22 v22, T23 v23, T24 v24, T25 v25,
    T26 v26, T27 v27) {
  return internal::ValueArray27<T1, T2, T3, T4, T5, T6, T7, T8, T9, T10, T11,
      T12, T13, T14, T15, T16, T17, T18, T19, T20, T21, T22, T23, T24, T25,
      T26, T27>(v1, v2, v3, v4, v5, v6, v7, v8, v9, v10, v11, v12, v13, v14,
      v15, v16, v17, v18, v19, v20, v21, v22, v23, v24, v25, v26, v27);
}

template <typename T1, typename T2, typename T3, typename T4, typename T5,
    typename T6, typename T7, typename T8, typename T9, typename T10,
    typename T11, typename T12, typename T13, typename T14, typename T15,
    typename T16, typename T17, typename T18, typename T19, typename T20,
    typename T21, typename T22, typename T23, typename T24, typename T25,
    typename T26, typename T27, typename T28>
internal::ValueArray28<T1, T2, T3, T4, T5, T6, T7, T8, T9, T10, T11, T12, T13,
    T14, T15, T16, T17, T18, T19, T20, T21, T22, T23, T24, T25, T26, T27,
    T28> Values(T1 v1, T2 v2, T3 v3, T4 v4, T5 v5, T6 v6, T7 v7, T8 v8, T9 v9,
    T10 v10, T11 v11, T12 v12, T13 v13, T14 v14, T15 v15, T16 v16, T17 v17,
    T18 v18, T19 v19, T20 v20, T21 v21, T22 v22, T23 v23, T24 v24, T25 v25,
    T26 v26, T27 v27, T28 v28) {
  return internal::ValueArray28<T1, T2, T3, T4, T5, T6, T7, T8, T9, T10, T11,
      T12, T13, T14, T15, T16, T17, T18, T19, T20, T21, T22, T23, T24, T25,
      T26, T27, T28>(v1, v2, v3, v4, v5, v6, v7, v8, v9, v10, v11, v12, v13,
      v14, v15, v16, v17, v18, v19, v20, v21, v22, v23, v24, v25, v26, v27,
      v28);
}

template <typename T1, typename T2, typename T3, typename T4, typename T5,
    typename T6, typename T7, typename T8, typename T9, typename T10,
    typename T11, typename T12, typename T13, typename T14, typename T15,
    typename T16, typename T17, typename T18, typename T19, typename T20,
    typename T21, typename T22, typename T23, typename T24, typename T25,
    typename T26, typename T27, typename T28, typename T29>
internal::ValueArray29<T1, T2, T3, T4, T5, T6, T7, T8, T9, T10, T11, T12, T13,
    T14, T15, T16, T17, T18, T19, T20, T21, T22, T23, T24, T25, T26, T27, T28,
    T29> Values(T1 v1, T2 v2, T3 v3, T4 v4, T5 v5, T6 v6, T7 v7, T8 v8, T9 v9,
    T10 v10, T11 v11, T12 v12, T13 v13, T14 v14, T15 v15, T16 v16, T17 v17,
    T18 v18, T19 v19, T20 v20, T21 v21, T22 v22, T23 v23, T24 v24, T25 v25,
    T26 v26, T27 v27, T28 v28, T29 v29) {
  return internal::ValueArray29<T1, T2, T3, T4, T5, T6, T7, T8, T9, T10, T11,
      T12, T13, T14, T15, T16, T17, T18, T19, T20, T21, T22, T23, T24, T25,
      T26, T27, T28, T29>(v1, v2, v3, v4, v5, v6, v7, v8, v9, v10, v11, v12,
      v13, v14, v15, v16, v17, v18, v19, v20, v21, v22, v23, v24, v25, v26,
      v27, v28, v29);
}

template <typename T1, typename T2, typename T3, typename T4, typename T5,
    typename T6, typename T7, typename T8, typename T9, typename T10,
    typename T11, typename T12, typename T13, typename T14, typename T15,
    typename T16, typename T17, typename T18, typename T19, typename T20,
    typename T21, typename T22, typename T23, typename T24, typename T25,
    typename T26, typename T27, typename T28, typename T29, typename T30>
internal::ValueArray30<T1, T2, T3, T4, T5, T6, T7, T8, T9, T10, T11, T12, T13,
    T14, T15, T16, T17, T18, T19, T20, T21, T22, T23, T24, T25, T26, T27, T28,
    T29, T30> Values(T1 v1, T2 v2, T3 v3, T4 v4, T5 v5, T6 v6, T7 v7, T8 v8,
    T9 v9, T10 v10, T11 v11, T12 v12, T13 v13, T14 v14, T15 v15, T16 v16,
    T17 v17, T18 v18, T19 v19, T20 v20, T21 v21, T22 v22, T23 v23, T24 v24,
    T25 v25, T26 v26, T27 v27, T28 v28, T29 v29, T30 v30) {
  return internal::ValueArray30<T1, T2, T3, T4, T5, T6, T7, T8, T9, T10, T11,
      T12, T13, T14, T15, T16, T17, T18, T19, T20, T21, T22, T23, T24, T25,
      T26, T27, T28, T29, T30>(v1, v2, v3, v4, v5, v6, v7, v8, v9, v10, v11,
      v12, v13, v14, v15, v16, v17, v18, v19, v20, v21, v22, v23, v24, v25,
      v26, v27, v28, v29, v30);
}

template <typename T1, typename T2, typename T3, typename T4, typename T5,
    typename T6, typename T7, typename T8, typename T9, typename T10,
    typename T11, typename T12, typename T13, typename T14, typename T15,
    typename T16, typename T17, typename T18, typename T19, typename T20,
    typename T21, typename T22, typename T23, typename T24, typename T25,
    typename T26, typename T27, typename T28, typename T29, typename T30,
    typename T31>
internal::ValueArray31<T1, T2, T3, T4, T5, T6, T7, T8, T9, T10, T11, T12, T13,
    T14, T15, T16, T17, T18, T19, T20, T21, T22, T23, T24, T25, T26, T27, T28,
    T29, T30, T31> Values(T1 v1, T2 v2, T3 v3, T4 v4, T5 v5, T6 v6, T7 v7,
    T8 v8, T9 v9, T10 v10, T11 v11, T12 v12, T13 v13, T14 v14, T15 v15,
    T16 v16, T17 v17, T18 v18, T19 v19, T20 v20, T21 v21, T22 v22, T23 v23,
    T24 v24, T25 v25, T26 v26, T27 v27, T28 v28, T29 v29, T30 v30, T31 v31) {
  return internal::ValueArray31<T1, T2, T3, T4, T5, T6, T7, T8, T9, T10, T11,
      T12, T13, T14, T15, T16, T17, T18, T19, T20, T21, T22, T23, T24, T25,
      T26, T27, T28, T29, T30, T31>(v1, v2, v3, v4, v5, v6, v7, v8, v9, v10,
      v11, v12, v13, v14, v15, v16, v17, v18, v19, v20, v21, v22, v23, v24,
      v25, v26, v27, v28, v29, v30, v31);
}

template <typename T1, typename T2, typename T3, typename T4, typename T5,
    typename T6, typename T7, typename T8, typename T9, typename T10,
    typename T11, typename T12, typename T13, typename T14, typename T15,
    typename T16, typename T17, typename T18, typename T19, typename T20,
    typename T21, typename T22, typename T23, typename T24, typename T25,
    typename T26, typename T27, typename T28, typename T29, typename T30,
    typename T31, typename T32>
internal::ValueArray32<T1, T2, T3, T4, T5, T6, T7, T8, T9, T10, T11, T12, T13,
    T14, T15, T16, T17, T18, T19, T20, T21, T22, T23, T24, T25, T26, T27, T28,
    T29, T30, T31, T32> Values(T1 v1, T2 v2, T3 v3, T4 v4, T5 v5, T6 v6, T7 v7,
    T8 v8, T9 v9, T10 v10, T11 v11, T12 v12, T13 v13, T14 v14, T15 v15,
    T16 v16, T17 v17, T18 v18, T19 v19, T20 v20, T21 v21, T22 v22, T23 v23,
    T24 v24, T25 v25, T26 v26, T27 v27, T28 v28, T29 v29, T30 v30, T31 v31,
    T32 v32) {
  return internal::ValueArray32<T1, T2, T3, T4, T5, T6, T7, T8, T9, T10, T11,
      T12, T13, T14, T15, T16, T17, T18, T19, T20, T21, T22, T23, T24, T25,
      T26, T27, T28, T29, T30, T31, T32>(v1, v2, v3, v4, v5, v6, v7, v8, v9,
      v10, v11, v12, v13, v14, v15, v16, v17, v18, v19, v20, v21, v22, v23,
      v24, v25, v26, v27, v28, v29, v30, v31, v32);
}

template <typename T1, typename T2, typename T3, typename T4, typename T5,
    typename T6, typename T7, typename T8, typename T9, typename T10,
    typename T11, typename T12, typename T13, typename T14, typename T15,
    typename T16, typename T17, typename T18, typename T19, typename T20,
    typename T21, typename T22, typename T23, typename T24, typename T25,
    typename T26, typename T27, typename T28, typename T29, typename T30,
    typename T31, typename T32, typename T33>
internal::ValueArray33<T1, T2, T3, T4, T5, T6, T7, T8, T9, T10, T11, T12, T13,
    T14, T15, T16, T17, T18, T19, T20, T21, T22, T23, T24, T25, T26, T27, T28,
    T29, T30, T31, T32, T33> Values(T1 v1, T2 v2, T3 v3, T4 v4, T5 v5, T6 v6,
    T7 v7, T8 v8, T9 v9, T10 v10, T11 v11, T12 v12, T13 v13, T14 v14, T15 v15,
    T16 v16, T17 v17, T18 v18, T19 v19, T20 v20, T21 v21, T22 v22, T23 v23,
    T24 v24, T25 v25, T26 v26, T27 v27, T28 v28, T29 v29, T30 v30, T31 v31,
    T32 v32, T33 v33) {
  return internal::ValueArray33<T1, T2, T3, T4, T5, T6, T7, T8, T9, T10, T11,
      T12, T13, T14, T15, T16, T17, T18, T19, T20, T21, T22, T23, T24, T25,
      T26, T27, T28, T29, T30, T31, T32, T33>(v1, v2, v3, v4, v5, v6, v7, v8,
      v9, v10, v11, v12, v13, v14, v15, v16, v17, v18, v19, v20, v21, v22, v23,
      v24, v25, v26, v27, v28, v29, v30, v31, v32, v33);
}

template <typename T1, typename T2, typename T3, typename T4, typename T5,
    typename T6, typename T7, typename T8, typename T9, typename T10,
    typename T11, typename T12, typename T13, typename T14, typename T15,
    typename T16, typename T17, typename T18, typename T19, typename T20,
    typename T21, typename T22, typename T23, typename T24, typename T25,
    typename T26, typename T27, typename T28, typename T29, typename T30,
    typename T31, typename T32, typename T33, typename T34>
internal::ValueArray34<T1, T2, T3, T4, T5, T6, T7, T8, T9, T10, T11, T12, T13,
    T14, T15, T16, T17, T18, T19, T20, T21, T22, T23, T24, T25, T26, T27, T28,
    T29, T30, T31, T32, T33, T34> Values(T1 v1, T2 v2, T3 v3, T4 v4, T5 v5,
    T6 v6, T7 v7, T8 v8, T9 v9, T10 v10, T11 v11, T12 v12, T13 v13, T14 v14,
    T15 v15, T16 v16, T17 v17, T18 v18, T19 v19, T20 v20, T21 v21, T22 v22,
    T23 v23, T24 v24, T25 v25, T26 v26, T27 v27, T28 v28, T29 v29, T30 v30,
    T31 v31, T32 v32, T33 v33, T34 v34) {
  return internal::ValueArray34<T1, T2, T3, T4, T5, T6, T7, T8, T9, T10, T11,
      T12, T13, T14, T15, T16, T17, T18, T19, T20, T21, T22, T23, T24, T25,
      T26, T27, T28, T29, T30, T31, T32, T33, T34>(v1, v2, v3, v4, v5, v6, v7,
      v8, v9, v10, v11, v12, v13, v14, v15, v16, v17, v18, v19, v20, v21, v22,
      v23, v24, v25, v26, v27, v28, v29, v30, v31, v32, v33, v34);
}

template <typename T1, typename T2, typename T3, typename T4, typename T5,
    typename T6, typename T7, typename T8, typename T9, typename T10,
    typename T11, typename T12, typename T13, typename T14, typename T15,
    typename T16, typename T17, typename T18, typename T19, typename T20,
    typename T21, typename T22, typename T23, typename T24, typename T25,
    typename T26, typename T27, typename T28, typename T29, typename T30,
    typename T31, typename T32, typename T33, typename T34, typename T35>
internal::ValueArray35<T1, T2, T3, T4, T5, T6, T7, T8, T9, T10, T11, T12, T13,
    T14, T15, T16, T17, T18, T19, T20, T21, T22, T23, T24, T25, T26, T27, T28,
    T29, T30, T31, T32, T33, T34, T35> Values(T1 v1, T2 v2, T3 v3, T4 v4,
    T5 v5, T6 v6, T7 v7, T8 v8, T9 v9, T10 v10, T11 v11, T12 v12, T13 v13,
    T14 v14, T15 v15, T16 v16, T17 v17, T18 v18, T19 v19, T20 v20, T21 v21,
    T22 v22, T23 v23, T24 v24, T25 v25, T26 v26, T27 v27, T28 v28, T29 v29,
    T30 v30, T31 v31, T32 v32, T33 v33, T34 v34, T35 v35) {
  return internal::ValueArray35<T1, T2, T3, T4, T5, T6, T7, T8, T9, T10, T11,
      T12, T13, T14, T15, T16, T17, T18, T19, T20, T21, T22, T23, T24, T25,
      T26, T27, T28, T29, T30, T31, T32, T33, T34, T35>(v1, v2, v3, v4, v5, v6,
      v7, v8, v9, v10, v11, v12, v13, v14, v15, v16, v17, v18, v19, v20, v21,
      v22, v23, v24, v25, v26, v27, v28, v29, v30, v31, v32, v33, v34, v35);
}

template <typename T1, typename T2, typename T3, typename T4, typename T5,
    typename T6, typename T7, typename T8, typename T9, typename T10,
    typename T11, typename T12, typename T13, typename T14, typename T15,
    typename T16, typename T17, typename T18, typename T19, typename T20,
    typename T21, typename T22, typename T23, typename T24, typename T25,
    typename T26, typename T27, typename T28, typename T29, typename T30,
    typename T31, typename T32, typename T33, typename T34, typename T35,
    typename T36>
internal::ValueArray36<T1, T2, T3, T4, T5, T6, T7, T8, T9, T10, T11, T12, T13,
    T14, T15, T16, T17, T18, T19, T20, T21, T22, T23, T24, T25, T26, T27, T28,
    T29, T30, T31, T32, T33, T34, T35, T36> Values(T1 v1, T2 v2, T3 v3, T4 v4,
    T5 v5, T6 v6, T7 v7, T8 v8, T9 v9, T10 v10, T11 v11, T12 v12, T13 v13,
    T14 v14, T15 v15, T16 v16, T17 v17, T18 v18, T19 v19, T20 v20, T21 v21,
    T22 v22, T23 v23, T24 v24, T25 v25, T26 v26, T27 v27, T28 v28, T29 v29,
    T30 v30, T31 v31, T32 v32, T33 v33, T34 v34, T35 v35, T36 v36) {
  return internal::ValueArray36<T1, T2, T3, T4, T5, T6, T7, T8, T9, T10, T11,
      T12, T13, T14, T15, T16, T17, T18, T19, T20, T21, T22, T23, T24, T25,
      T26, T27, T28, T29, T30, T31, T32, T33, T34, T35, T36>(v1, v2, v3, v4,
      v5, v6, v7, v8, v9, v10, v11, v12, v13, v14, v15, v16, v17, v18, v19,
      v20, v21, v22, v23, v24, v25, v26, v27, v28, v29, v30, v31, v32, v33,
      v34, v35, v36);
}

template <typename T1, typename T2, typename T3, typename T4, typename T5,
    typename T6, typename T7, typename T8, typename T9, typename T10,
    typename T11, typename T12, typename T13, typename T14, typename T15,
    typename T16, typename T17, typename T18, typename T19, typename T20,
    typename T21, typename T22, typename T23, typename T24, typename T25,
    typename T26, typename T27, typename T28, typename T29, typename T30,
    typename T31, typename T32, typename T33, typename T34, typename T35,
    typename T36, typename T37>
internal::ValueArray37<T1, T2, T3, T4, T5, T6, T7, T8, T9, T10, T11, T12, T13,
    T14, T15, T16, T17, T18, T19, T20, T21, T22, T23, T24, T25, T26, T27, T28,
    T29, T30, T31, T32, T33, T34, T35, T36, T37> Values(T1 v1, T2 v2, T3 v3,
    T4 v4, T5 v5, T6 v6, T7 v7, T8 v8, T9 v9, T10 v10, T11 v11, T12 v12,
    T13 v13, T14 v14, T15 v15, T16 v16, T17 v17, T18 v18, T19 v19, T20 v20,
    T21 v21, T22 v22, T23 v23, T24 v24, T25 v25, T26 v26, T27 v27, T28 v28,
    T29 v29, T30 v30, T31 v31, T32 v32, T33 v33, T34 v34, T35 v35, T36 v36,
    T37 v37) {
  return internal::ValueArray37<T1, T2, T3, T4, T5, T6, T7, T8, T9, T10, T11,
      T12, T13, T14, T15, T16, T17, T18, T19, T20, T21, T22, T23, T24, T25,
      T26, T27, T28, T29, T30, T31, T32, T33, T34, T35, T36, T37>(v1, v2, v3,
      v4, v5, v6, v7, v8, v9, v10, v11, v12, v13, v14, v15, v16, v17, v18, v19,
      v20, v21, v22, v23, v24, v25, v26, v27, v28, v29, v30, v31, v32, v33,
      v34, v35, v36, v37);
}

template <typename T1, typename T2, typename T3, typename T4, typename T5,
    typename T6, typename T7, typename T8, typename T9, typename T10,
    typename T11, typename T12, typename T13, typename T14, typename T15,
    typename T16, typename T17, typename T18, typename T19, typename T20,
    typename T21, typename T22, typename T23, typename T24, typename T25,
    typename T26, typename T27, typename T28, typename T29, typename T30,
    typename T31, typename T32, typename T33, typename T34, typename T35,
    typename T36, typename T37, typename T38>
internal::ValueArray38<T1, T2, T3, T4, T5, T6, T7, T8, T9, T10, T11, T12, T13,
    T14, T15, T16, T17, T18, T19, T20, T21, T22, T23, T24, T25, T26, T27, T28,
    T29, T30, T31, T32, T33, T34, T35, T36, T37, T38> Values(T1 v1, T2 v2,
    T3 v3, T4 v4, T5 v5, T6 v6, T7 v7, T8 v8, T9 v9, T10 v10, T11 v11, T12 v12,
    T13 v13, T14 v14, T15 v15, T16 v16, T17 v17, T18 v18, T19 v19, T20 v20,
    T21 v21, T22 v22, T23 v23, T24 v24, T25 v25, T26 v26, T27 v27, T28 v28,
    T29 v29, T30 v30, T31 v31, T32 v32, T33 v33, T34 v34, T35 v35, T36 v36,
    T37 v37, T38 v38) {
  return internal::ValueArray38<T1, T2, T3, T4, T5, T6, T7, T8, T9, T10, T11,
      T12, T13, T14, T15, T16, T17, T18, T19, T20, T21, T22, T23, T24, T25,
      T26, T27, T28, T29, T30, T31, T32, T33, T34, T35, T36, T37, T38>(v1, v2,
      v3, v4, v5, v6, v7, v8, v9, v10, v11, v12, v13, v14, v15, v16, v17, v18,
      v19, v20, v21, v22, v23, v24, v25, v26, v27, v28, v29, v30, v31, v32,
      v33, v34, v35, v36, v37, v38);
}

template <typename T1, typename T2, typename T3, typename T4, typename T5,
    typename T6, typename T7, typename T8, typename T9, typename T10,
    typename T11, typename T12, typename T13, typename T14, typename T15,
    typename T16, typename T17, typename T18, typename T19, typename T20,
    typename T21, typename T22, typename T23, typename T24, typename T25,
    typename T26, typename T27, typename T28, typename T29, typename T30,
    typename T31, typename T32, typename T33, typename T34, typename T35,
    typename T36, typename T37, typename T38, typename T39>
internal::ValueArray39<T1, T2, T3, T4, T5, T6, T7, T8, T9, T10, T11, T12, T13,
    T14, T15, T16, T17, T18, T19, T20, T21, T22, T23, T24, T25, T26, T27, T28,
    T29, T30, T31, T32, T33, T34, T35, T36, T37, T38, T39> Values(T1 v1, T2 v2,
    T3 v3, T4 v4, T5 v5, T6 v6, T7 v7, T8 v8, T9 v9, T10 v10, T11 v11, T12 v12,
    T13 v13, T14 v14, T15 v15, T16 v16, T17 v17, T18 v18, T19 v19, T20 v20,
    T21 v21, T22 v22, T23 v23, T24 v24, T25 v25, T26 v26, T27 v27, T28 v28,
    T29 v29, T30 v30, T31 v31, T32 v32, T33 v33, T34 v34, T35 v35, T36 v36,
    T37 v37, T38 v38, T39 v39) {
  return internal::ValueArray39<T1, T2, T3, T4, T5, T6, T7, T8, T9, T10, T11,
      T12, T13, T14, T15, T16, T17, T18, T19, T20, T21, T22, T23, T24, T25,
      T26, T27, T28, T29, T30, T31, T32, T33, T34, T35, T36, T37, T38, T39>(v1,
      v2, v3, v4, v5, v6, v7, v8, v9, v10, v11, v12, v13, v14, v15, v16, v17,
      v18, v19, v20, v21, v22, v23, v24, v25, v26, v27, v28, v29, v30, v31,
      v32, v33, v34, v35, v36, v37, v38, v39);
}

template <typename T1, typename T2, typename T3, typename T4, typename T5,
    typename T6, typename T7, typename T8, typename T9, typename T10,
    typename T11, typename T12, typename T13, typename T14, typename T15,
    typename T16, typename T17, typename T18, typename T19, typename T20,
    typename T21, typename T22, typename T23, typename T24, typename T25,
    typename T26, typename T27, typename T28, typename T29, typename T30,
    typename T31, typename T32, typename T33, typename T34, typename T35,
    typename T36, typename T37, typename T38, typename T39, typename T40>
internal::ValueArray40<T1, T2, T3, T4, T5, T6, T7, T8, T9, T10, T11, T12, T13,
    T14, T15, T16, T17, T18, T19, T20, T21, T22, T23, T24, T25, T26, T27, T28,
    T29, T30, T31, T32, T33, T34, T35, T36, T37, T38, T39, T40> Values(T1 v1,
    T2 v2, T3 v3, T4 v4, T5 v5, T6 v6, T7 v7, T8 v8, T9 v9, T10 v10, T11 v11,
    T12 v12, T13 v13, T14 v14, T15 v15, T16 v16, T17 v17, T18 v18, T19 v19,
    T20 v20, T21 v21, T22 v22, T23 v23, T24 v24, T25 v25, T26 v26, T27 v27,
    T28 v28, T29 v29, T30 v30, T31 v31, T32 v32, T33 v33, T34 v34, T35 v35,
    T36 v36, T37 v37, T38 v38, T39 v39, T40 v40) {
  return internal::ValueArray40<T1, T2, T3, T4, T5, T6, T7, T8, T9, T10, T11,
      T12, T13, T14, T15, T16, T17, T18, T19, T20, T21, T22, T23, T24, T25,
      T26, T27, T28, T29, T30, T31, T32, T33, T34, T35, T36, T37, T38, T39,
      T40>(v1, v2, v3, v4, v5, v6, v7, v8, v9, v10, v11, v12, v13, v14, v15,
      v16, v17, v18, v19, v20, v21, v22, v23, v24, v25, v26, v27, v28, v29,
      v30, v31, v32, v33, v34, v35, v36, v37, v38, v39, v40);
}

template <typename T1, typename T2, typename T3, typename T4, typename T5,
    typename T6, typename T7, typename T8, typename T9, typename T10,
    typename T11, typename T12, typename T13, typename T14, typename T15,
    typename T16, typename T17, typename T18, typename T19, typename T20,
    typename T21, typename T22, typename T23, typename T24, typename T25,
    typename T26, typename T27, typename T28, typename T29, typename T30,
    typename T31, typename T32, typename T33, typename T34, typename T35,
    typename T36, typename T37, typename T38, typename T39, typename T40,
    typename T41>
internal::ValueArray41<T1, T2, T3, T4, T5, T6, T7, T8, T9, T10, T11, T12, T13,
    T14, T15, T16, T17, T18, T19, T20, T21, T22, T23, T24, T25, T26, T27, T28,
    T29, T30, T31, T32, T33, T34, T35, T36, T37, T38, T39, T40,
    T41> Values(T1 v1, T2 v2, T3 v3, T4 v4, T5 v5, T6 v6, T7 v7, T8 v8, T9 v9,
    T10 v10, T11 v11, T12 v12, T13 v13, T14 v14, T15 v15, T16 v16, T17 v17,
    T18 v18, T19 v19, T20 v20, T21 v21, T22 v22, T23 v23, T24 v24, T25 v25,
    T26 v26, T27 v27, T28 v28, T29 v29, T30 v30, T31 v31, T32 v32, T33 v33,
    T34 v34, T35 v35, T36 v36, T37 v37, T38 v38, T39 v39, T40 v40, T41 v41) {
  return internal::ValueArray41<T1, T2, T3, T4, T5, T6, T7, T8, T9, T10, T11,
      T12, T13, T14, T15, T16, T17, T18, T19, T20, T21, T22, T23, T24, T25,
      T26, T27, T28, T29, T30, T31, T32, T33, T34, T35, T36, T37, T38, T39,
      T40, T41>(v1, v2, v3, v4, v5, v6, v7, v8, v9, v10, v11, v12, v13, v14,
      v15, v16, v17, v18, v19, v20, v21, v22, v23, v24, v25, v26, v27, v28,
      v29, v30, v31, v32, v33, v34, v35, v36, v37, v38, v39, v40, v41);
}

template <typename T1, typename T2, typename T3, typename T4, typename T5,
    typename T6, typename T7, typename T8, typename T9, typename T10,
    typename T11, typename T12, typename T13, typename T14, typename T15,
    typename T16, typename T17, typename T18, typename T19, typename T20,
    typename T21, typename T22, typename T23, typename T24, typename T25,
    typename T26, typename T27, typename T28, typename T29, typename T30,
    typename T31, typename T32, typename T33, typename T34, typename T35,
    typename T36, typename T37, typename T38, typename T39, typename T40,
    typename T41, typename T42>
internal::ValueArray42<T1, T2, T3, T4, T5, T6, T7, T8, T9, T10, T11, T12, T13,
    T14, T15, T16, T17, T18, T19, T20, T21, T22, T23, T24, T25, T26, T27, T28,
    T29, T30, T31, T32, T33, T34, T35, T36, T37, T38, T39, T40, T41,
    T42> Values(T1 v1, T2 v2, T3 v3, T4 v4, T5 v5, T6 v6, T7 v7, T8 v8, T9 v9,
    T10 v10, T11 v11, T12 v12, T13 v13, T14 v14, T15 v15, T16 v16, T17 v17,
    T18 v18, T19 v19, T20 v20, T21 v21, T22 v22, T23 v23, T24 v24, T25 v25,
    T26 v26, T27 v27, T28 v28, T29 v29, T30 v30, T31 v31, T32 v32, T33 v33,
    T34 v34, T35 v35, T36 v36, T37 v37, T38 v38, T39 v39, T40 v40, T41 v41,
    T42 v42) {
  return internal::ValueArray42<T1, T2, T3, T4, T5, T6, T7, T8, T9, T10, T11,
      T12, T13, T14, T15, T16, T17, T18, T19, T20, T21, T22, T23, T24, T25,
      T26, T27, T28, T29, T30, T31, T32, T33, T34, T35, T36, T37, T38, T39,
      T40, T41, T42>(v1, v2, v3, v4, v5, v6, v7, v8, v9, v10, v11, v12, v13,
      v14, v15, v16, v17, v18, v19, v20, v21, v22, v23, v24, v25, v26, v27,
      v28, v29, v30, v31, v32, v33, v34, v35, v36, v37, v38, v39, v40, v41,
      v42);
}

template <typename T1, typename T2, typename T3, typename T4, typename T5,
    typename T6, typename T7, typename T8, typename T9, typename T10,
    typename T11, typename T12, typename T13, typename T14, typename T15,
    typename T16, typename T17, typename T18, typename T19, typename T20,
    typename T21, typename T22, typename T23, typename T24, typename T25,
    typename T26, typename T27, typename T28, typename T29, typename T30,
    typename T31, typename T32, typename T33, typename T34, typename T35,
    typename T36, typename T37, typename T38, typename T39, typename T40,
    typename T41, typename T42, typename T43>
internal::ValueArray43<T1, T2, T3, T4, T5, T6, T7, T8, T9, T10, T11, T12, T13,
    T14, T15, T16, T17, T18, T19, T20, T21, T22, T23, T24, T25, T26, T27, T28,
    T29, T30, T31, T32, T33, T34, T35, T36, T37, T38, T39, T40, T41, T42,
    T43> Values(T1 v1, T2 v2, T3 v3, T4 v4, T5 v5, T6 v6, T7 v7, T8 v8, T9 v9,
    T10 v10, T11 v11, T12 v12, T13 v13, T14 v14, T15 v15, T16 v16, T17 v17,
    T18 v18, T19 v19, T20 v20, T21 v21, T22 v22, T23 v23, T24 v24, T25 v25,
    T26 v26, T27 v27, T28 v28, T29 v29, T30 v30, T31 v31, T32 v32, T33 v33,
    T34 v34, T35 v35, T36 v36, T37 v37, T38 v38, T39 v39, T40 v40, T41 v41,
    T42 v42, T43 v43) {
  return internal::ValueArray43<T1, T2, T3, T4, T5, T6, T7, T8, T9, T10, T11,
      T12, T13, T14, T15, T16, T17, T18, T19, T20, T21, T22, T23, T24, T25,
      T26, T27, T28, T29, T30, T31, T32, T33, T34, T35, T36, T37, T38, T39,
      T40, T41, T42, T43>(v1, v2, v3, v4, v5, v6, v7, v8, v9, v10, v11, v12,
      v13, v14, v15, v16, v17, v18, v19, v20, v21, v22, v23, v24, v25, v26,
      v27, v28, v29, v30, v31, v32, v33, v34, v35, v36, v37, v38, v39, v40,
      v41, v42, v43);
}

template <typename T1, typename T2, typename T3, typename T4, typename T5,
    typename T6, typename T7, typename T8, typename T9, typename T10,
    typename T11, typename T12, typename T13, typename T14, typename T15,
    typename T16, typename T17, typename T18, typename T19, typename T20,
    typename T21, typename T22, typename T23, typename T24, typename T25,
    typename T26, typename T27, typename T28, typename T29, typename T30,
    typename T31, typename T32, typename T33, typename T34, typename T35,
    typename T36, typename T37, typename T38, typename T39, typename T40,
    typename T41, typename T42, typename T43, typename T44>
internal::ValueArray44<T1, T2, T3, T4, T5, T6, T7, T8, T9, T10, T11, T12, T13,
    T14, T15, T16, T17, T18, T19, T20, T21, T22, T23, T24, T25, T26, T27, T28,
    T29, T30, T31, T32, T33, T34, T35, T36, T37, T38, T39, T40, T41, T42, T43,
    T44> Values(T1 v1, T2 v2, T3 v3, T4 v4, T5 v5, T6 v6, T7 v7, T8 v8, T9 v9,
    T10 v10, T11 v11, T12 v12, T13 v13, T14 v14, T15 v15, T16 v16, T17 v17,
    T18 v18, T19 v19, T20 v20, T21 v21, T22 v22, T23 v23, T24 v24, T25 v25,
    T26 v26, T27 v27, T28 v28, T29 v29, T30 v30, T31 v31, T32 v32, T33 v33,
    T34 v34, T35 v35, T36 v36, T37 v37, T38 v38, T39 v39, T40 v40, T41 v41,
    T42 v42, T43 v43, T44 v44) {
  return internal::ValueArray44<T1, T2, T3, T4, T5, T6, T7, T8, T9, T10, T11,
      T12, T13, T14, T15, T16, T17, T18, T19, T20, T21, T22, T23, T24, T25,
      T26, T27, T28, T29, T30, T31, T32, T33, T34, T35, T36, T37, T38, T39,
      T40, T41, T42, T43, T44>(v1, v2, v3, v4, v5, v6, v7, v8, v9, v10, v11,
      v12, v13, v14, v15, v16, v17, v18, v19, v20, v21, v22, v23, v24, v25,
      v26, v27, v28, v29, v30, v31, v32, v33, v34, v35, v36, v37, v38, v39,
      v40, v41, v42, v43, v44);
}

template <typename T1, typename T2, typename T3, typename T4, typename T5,
    typename T6, typename T7, typename T8, typename T9, typename T10,
    typename T11, typename T12, typename T13, typename T14, typename T15,
    typename T16, typename T17, typename T18, typename T19, typename T20,
    typename T21, typename T22, typename T23, typename T24, typename T25,
    typename T26, typename T27, typename T28, typename T29, typename T30,
    typename T31, typename T32, typename T33, typename T34, typename T35,
    typename T36, typename T37, typename T38, typename T39, typename T40,
    typename T41, typename T42, typename T43, typename T44, typename T45>
internal::ValueArray45<T1, T2, T3, T4, T5, T6, T7, T8, T9, T10, T11, T12, T13,
    T14, T15, T16, T17, T18, T19, T20, T21, T22, T23, T24, T25, T26, T27, T28,
    T29, T30, T31, T32, T33, T34, T35, T36, T37, T38, T39, T40, T41, T42, T43,
    T44, T45> Values(T1 v1, T2 v2, T3 v3, T4 v4, T5 v5, T6 v6, T7 v7, T8 v8,
    T9 v9, T10 v10, T11 v11, T12 v12, T13 v13, T14 v14, T15 v15, T16 v16,
    T17 v17, T18 v18, T19 v19, T20 v20, T21 v21, T22 v22, T23 v23, T24 v24,
    T25 v25, T26 v26, T27 v27, T28 v28, T29 v29, T30 v30, T31 v31, T32 v32,
    T33 v33, T34 v34, T35 v35, T36 v36, T37 v37, T38 v38, T39 v39, T40 v40,
    T41 v41, T42 v42, T43 v43, T44 v44, T45 v45) {
  return internal::ValueArray45<T1, T2, T3, T4, T5, T6, T7, T8, T9, T10, T11,
      T12, T13, T14, T15, T16, T17, T18, T19, T20, T21, T22, T23, T24, T25,
      T26, T27, T28, T29, T30, T31, T32, T33, T34, T35, T36, T37, T38, T39,
      T40, T41, T42, T43, T44, T45>(v1, v2, v3, v4, v5, v6, v7, v8, v9, v10,
      v11, v12, v13, v14, v15, v16, v17, v18, v19, v20, v21, v22, v23, v24,
      v25, v26, v27, v28, v29, v30, v31, v32, v33, v34, v35, v36, v37, v38,
      v39, v40, v41, v42, v43, v44, v45);
}

template <typename T1, typename T2, typename T3, typename T4, typename T5,
    typename T6, typename T7, typename T8, typename T9, typename T10,
    typename T11, typename T12, typename T13, typename T14, typename T15,
    typename T16, typename T17, typename T18, typename T19, typename T20,
    typename T21, typename T22, typename T23, typename T24, typename T25,
    typename T26, typename T27, typename T28, typename T29, typename T30,
    typename T31, typename T32, typename T33, typename T34, typename T35,
    typename T36, typename T37, typename T38, typename T39, typename T40,
    typename T41, typename T42, typename T43, typename T44, typename T45,
    typename T46>
internal::ValueArray46<T1, T2, T3, T4, T5, T6, T7, T8, T9, T10, T11, T12, T13,
    T14, T15, T16, T17, T18, T19, T20, T21, T22, T23, T24, T25, T26, T27, T28,
    T29, T30, T31, T32, T33, T34, T35, T36, T37, T38, T39, T40, T41, T42, T43,
    T44, T45, T46> Values(T1 v1, T2 v2, T3 v3, T4 v4, T5 v5, T6 v6, T7 v7,
    T8 v8, T9 v9, T10 v10, T11 v11, T12 v12, T13 v13, T14 v14, T15 v15,
    T16 v16, T17 v17, T18 v18, T19 v19, T20 v20, T21 v21, T22 v22, T23 v23,
    T24 v24, T25 v25, T26 v26, T27 v27, T28 v28, T29 v29, T30 v30, T31 v31,
    T32 v32, T33 v33, T34 v34, T35 v35, T36 v36, T37 v37, T38 v38, T39 v39,
    T40 v40, T41 v41, T42 v42, T43 v43, T44 v44, T45 v45, T46 v46) {
  return internal::ValueArray46<T1, T2, T3, T4, T5, T6, T7, T8, T9, T10, T11,
      T12, T13, T14, T15, T16, T17, T18, T19, T20, T21, T22, T23, T24, T25,
      T26, T27, T28, T29, T30, T31, T32, T33, T34, T35, T36, T37, T38, T39,
      T40, T41, T42, T43, T44, T45, T46>(v1, v2, v3, v4, v5, v6, v7, v8, v9,
      v10, v11, v12, v13, v14, v15, v16, v17, v18, v19, v20, v21, v22, v23,
      v24, v25, v26, v27, v28, v29, v30, v31, v32, v33, v34, v35, v36, v37,
      v38, v39, v40, v41, v42, v43, v44, v45, v46);
}

template <typename T1, typename T2, typename T3, typename T4, typename T5,
    typename T6, typename T7, typename T8, typename T9, typename T10,
    typename T11, typename T12, typename T13, typename T14, typename T15,
    typename T16, typename T17, typename T18, typename T19, typename T20,
    typename T21, typename T22, typename T23, typename T24, typename T25,
    typename T26, typename T27, typename T28, typename T29, typename T30,
    typename T31, typename T32, typename T33, typename T34, typename T35,
    typename T36, typename T37, typename T38, typename T39, typename T40,
    typename T41, typename T42, typename T43, typename T44, typename T45,
    typename T46, typename T47>
internal::ValueArray47<T1, T2, T3, T4, T5, T6, T7, T8, T9, T10, T11, T12, T13,
    T14, T15, T16, T17, T18, T19, T20, T21, T22, T23, T24, T25, T26, T27, T28,
    T29, T30, T31, T32, T33, T34, T35, T36, T37, T38, T39, T40, T41, T42, T43,
    T44, T45, T46, T47> Values(T1 v1, T2 v2, T3 v3, T4 v4, T5 v5, T6 v6, T7 v7,
    T8 v8, T9 v9, T10 v10, T11 v11, T12 v12, T13 v13, T14 v14, T15 v15,
    T16 v16, T17 v17, T18 v18, T19 v19, T20 v20, T21 v21, T22 v22, T23 v23,
    T24 v24, T25 v25, T26 v26, T27 v27, T28 v28, T29 v29, T30 v30, T31 v31,
    T32 v32, T33 v33, T34 v34, T35 v35, T36 v36, T37 v37, T38 v38, T39 v39,
    T40 v40, T41 v41, T42 v42, T43 v43, T44 v44, T45 v45, T46 v46, T47 v47) {
  return internal::ValueArray47<T1, T2, T3, T4, T5, T6, T7, T8, T9, T10, T11,
      T12, T13, T14, T15, T16, T17, T18, T19, T20, T21, T22, T23, T24, T25,
      T26, T27, T28, T29, T30, T31, T32, T33, T34, T35, T36, T37, T38, T39,
      T40, T41, T42, T43, T44, T45, T46, T47>(v1, v2, v3, v4, v5, v6, v7, v8,
      v9, v10, v11, v12, v13, v14, v15, v16, v17, v18, v19, v20, v21, v22, v23,
      v24, v25, v26, v27, v28, v29, v30, v31, v32, v33, v34, v35, v36, v37,
      v38, v39, v40, v41, v42, v43, v44, v45, v46, v47);
}

template <typename T1, typename T2, typename T3, typename T4, typename T5,
    typename T6, typename T7, typename T8, typename T9, typename T10,
    typename T11, typename T12, typename T13, typename T14, typename T15,
    typename T16, typename T17, typename T18, typename T19, typename T20,
    typename T21, typename T22, typename T23, typename T24, typename T25,
    typename T26, typename T27, typename T28, typename T29, typename T30,
    typename T31, typename T32, typename T33, typename T34, typename T35,
    typename T36, typename T37, typename T38, typename T39, typename T40,
    typename T41, typename T42, typename T43, typename T44, typename T45,
    typename T46, typename T47, typename T48>
internal::ValueArray48<T1, T2, T3, T4, T5, T6, T7, T8, T9, T10, T11, T12, T13,
    T14, T15, T16, T17, T18, T19, T20, T21, T22, T23, T24, T25, T26, T27, T28,
    T29, T30, T31, T32, T33, T34, T35, T36, T37, T38, T39, T40, T41, T42, T43,
    T44, T45, T46, T47, T48> Values(T1 v1, T2 v2, T3 v3, T4 v4, T5 v5, T6 v6,
    T7 v7, T8 v8, T9 v9, T10 v10, T11 v11, T12 v12, T13 v13, T14 v14, T15 v15,
    T16 v16, T17 v17, T18 v18, T19 v19, T20 v20, T21 v21, T22 v22, T23 v23,
    T24 v24, T25 v25, T26 v26, T27 v27, T28 v28, T29 v29, T30 v30, T31 v31,
    T32 v32, T33 v33, T34 v34, T35 v35, T36 v36, T37 v37, T38 v38, T39 v39,
    T40 v40, T41 v41, T42 v42, T43 v43, T44 v44, T45 v45, T46 v46, T47 v47,
    T48 v48) {
  return internal::ValueArray48<T1, T2, T3, T4, T5, T6, T7, T8, T9, T10, T11,
      T12, T13, T14, T15, T16, T17, T18, T19, T20, T21, T22, T23, T24, T25,
      T26, T27, T28, T29, T30, T31, T32, T33, T34, T35, T36, T37, T38, T39,
      T40, T41, T42, T43, T44, T45, T46, T47, T48>(v1, v2, v3, v4, v5, v6, v7,
      v8, v9, v10, v11, v12, v13, v14, v15, v16, v17, v18, v19, v20, v21, v22,
      v23, v24, v25, v26, v27, v28, v29, v30, v31, v32, v33, v34, v35, v36,
      v37, v38, v39, v40, v41, v42, v43, v44, v45, v46, v47, v48);
}

template <typename T1, typename T2, typename T3, typename T4, typename T5,
    typename T6, typename T7, typename T8, typename T9, typename T10,
    typename T11, typename T12, typename T13, typename T14, typename T15,
    typename T16, typename T17, typename T18, typename T19, typename T20,
    typename T21, typename T22, typename T23, typename T24, typename T25,
    typename T26, typename T27, typename T28, typename T29, typename T30,
    typename T31, typename T32, typename T33, typename T34, typename T35,
    typename T36, typename T37, typename T38, typename T39, typename T40,
    typename T41, typename T42, typename T43, typename T44, typename T45,
    typename T46, typename T47, typename T48, typename T49>
internal::ValueArray49<T1, T2, T3, T4, T5, T6, T7, T8, T9, T10, T11, T12, T13,
    T14, T15, T16, T17, T18, T19, T20, T21, T22, T23, T24, T25, T26, T27, T28,
    T29, T30, T31, T32, T33, T34, T35, T36, T37, T38, T39, T40, T41, T42, T43,
    T44, T45, T46, T47, T48, T49> Values(T1 v1, T2 v2, T3 v3, T4 v4, T5 v5,
    T6 v6, T7 v7, T8 v8, T9 v9, T10 v10, T11 v11, T12 v12, T13 v13, T14 v14,
    T15 v15, T16 v16, T17 v17, T18 v18, T19 v19, T20 v20, T21 v21, T22 v22,
    T23 v23, T24 v24, T25 v25, T26 v26, T27 v27, T28 v28, T29 v29, T30 v30,
    T31 v31, T32 v32, T33 v33, T34 v34, T35 v35, T36 v36, T37 v37, T38 v38,
    T39 v39, T40 v40, T41 v41, T42 v42, T43 v43, T44 v44, T45 v45, T46 v46,
    T47 v47, T48 v48, T49 v49) {
  return internal::ValueArray49<T1, T2, T3, T4, T5, T6, T7, T8, T9, T10, T11,
      T12, T13, T14, T15, T16, T17, T18, T19, T20, T21, T22, T23, T24, T25,
      T26, T27, T28, T29, T30, T31, T32, T33, T34, T35, T36, T37, T38, T39,
      T40, T41, T42, T43, T44, T45, T46, T47, T48, T49>(v1, v2, v3, v4, v5, v6,
      v7, v8, v9, v10, v11, v12, v13, v14, v15, v16, v17, v18, v19, v20, v21,
      v22, v23, v24, v25, v26, v27, v28, v29, v30, v31, v32, v33, v34, v35,
      v36, v37, v38, v39, v40, v41, v42, v43, v44, v45, v46, v47, v48, v49);
}

template <typename T1, typename T2, typename T3, typename T4, typename T5,
    typename T6, typename T7, typename T8, typename T9, typename T10,
    typename T11, typename T12, typename T13, typename T14, typename T15,
    typename T16, typename T17, typename T18, typename T19, typename T20,
    typename T21, typename T22, typename T23, typename T24, typename T25,
    typename T26, typename T27, typename T28, typename T29, typename T30,
    typename T31, typename T32, typename T33, typename T34, typename T35,
    typename T36, typename T37, typename T38, typename T39, typename T40,
    typename T41, typename T42, typename T43, typename T44, typename T45,
    typename T46, typename T47, typename T48, typename T49, typename T50>
internal::ValueArray50<T1, T2, T3, T4, T5, T6, T7, T8, T9, T10, T11, T12, T13,
    T14, T15, T16, T17, T18, T19, T20, T21, T22, T23, T24, T25, T26, T27, T28,
    T29, T30, T31, T32, T33, T34, T35, T36, T37, T38, T39, T40, T41, T42, T43,
    T44, T45, T46, T47, T48, T49, T50> Values(T1 v1, T2 v2, T3 v3, T4 v4,
    T5 v5, T6 v6, T7 v7, T8 v8, T9 v9, T10 v10, T11 v11, T12 v12, T13 v13,
    T14 v14, T15 v15, T16 v16, T17 v17, T18 v18, T19 v19, T20 v20, T21 v21,
    T22 v22, T23 v23, T24 v24, T25 v25, T26 v26, T27 v27, T28 v28, T29 v29,
    T30 v30, T31 v31, T32 v32, T33 v33, T34 v34, T35 v35, T36 v36, T37 v37,
    T38 v38, T39 v39, T40 v40, T41 v41, T42 v42, T43 v43, T44 v44, T45 v45,
    T46 v46, T47 v47, T48 v48, T49 v49, T50 v50) {
  return internal::ValueArray50<T1, T2, T3, T4, T5, T6, T7, T8, T9, T10, T11,
      T12, T13, T14, T15, T16, T17, T18, T19, T20, T21, T22, T23, T24, T25,
      T26, T27, T28, T29, T30, T31, T32, T33, T34, T35, T36, T37, T38, T39,
      T40, T41, T42, T43, T44, T45, T46, T47, T48, T49, T50>(v1, v2, v3, v4,
      v5, v6, v7, v8, v9, v10, v11, v12, v13, v14, v15, v16, v17, v18, v19,
      v20, v21, v22, v23, v24, v25, v26, v27, v28, v29, v30, v31, v32, v33,
      v34, v35, v36, v37, v38, v39, v40, v41, v42, v43, v44, v45, v46, v47,
      v48, v49, v50);
}

// Bool() allows generating tests with parameters in a set of (false, true).
//
// Synopsis:
// Bool()
//   - returns a generator producing sequences with elements {false, true}.
//
// It is useful when testing code that depends on Boolean flags. Combinations
// of multiple flags can be tested when several Bool()'s are combined using
// Combine() function.
//
// In the following example all tests in the test case FlagDependentTest
// will be instantiated twice with parameters false and true.
//
// class FlagDependentTest : public testing::TestWithParam<bool> {
//   virtual void SetUp() {
//     external_flag = GetParam();
//   }
// }
// INSTANTIATE_TEST_CASE_P(BoolSequence, FlagDependentTest, Bool());
//
inline internal::ParamGenerator<bool> Bool() {
  return Values(false, true);
}

# if GTEST_HAS_COMBINE
// Combine() allows the user to combine two or more sequences to produce
// values of a Cartesian product of those sequences' elements.
//
// Synopsis:
// Combine(gen1, gen2, ..., genN)
//   - returns a generator producing sequences with elements coming from
//     the Cartesian product of elements from the sequences generated by
//     gen1, gen2, ..., genN. The sequence elements will have a type of
//     tuple<T1, T2, ..., TN> where T1, T2, ..., TN are the types
//     of elements from sequences produces by gen1, gen2, ..., genN.
//
// Combine can have up to 10 arguments. This number is currently limited
// by the maximum number of elements in the tuple implementation used by Google
// Test.
//
// Example:
//
// This will instantiate tests in test case AnimalTest each one with
// the parameter values tuple("cat", BLACK), tuple("cat", WHITE),
// tuple("dog", BLACK), and tuple("dog", WHITE):
//
// enum Color { BLACK, GRAY, WHITE };
// class AnimalTest
//     : public testing::TestWithParam<tuple<const char*, Color> > {...};
//
// TEST_P(AnimalTest, AnimalLooksNice) {...}
//
// INSTANTIATE_TEST_CASE_P(AnimalVariations, AnimalTest,
//                         Combine(Values("cat", "dog"),
//                                 Values(BLACK, WHITE)));
//
// This will instantiate tests in FlagDependentTest with all variations of two
// Boolean flags:
//
// class FlagDependentTest
//     : public testing::TestWithParam<tuple<bool, bool> > {
//   virtual void SetUp() {
//     // Assigns external_flag_1 and external_flag_2 values from the tuple.
//     tie(external_flag_1, external_flag_2) = GetParam();
//   }
// };
//
// TEST_P(FlagDependentTest, TestFeature1) {
//   // Test your code using external_flag_1 and external_flag_2 here.
// }
// INSTANTIATE_TEST_CASE_P(TwoBoolSequence, FlagDependentTest,
//                         Combine(Bool(), Bool()));
//
template <typename Generator1, typename Generator2>
internal::CartesianProductHolder2<Generator1, Generator2> Combine(
    const Generator1& g1, const Generator2& g2) {
  return internal::CartesianProductHolder2<Generator1, Generator2>(
      g1, g2);
}

template <typename Generator1, typename Generator2, typename Generator3>
internal::CartesianProductHolder3<Generator1, Generator2, Generator3> Combine(
    const Generator1& g1, const Generator2& g2, const Generator3& g3) {
  return internal::CartesianProductHolder3<Generator1, Generator2, Generator3>(
      g1, g2, g3);
}

template <typename Generator1, typename Generator2, typename Generator3,
    typename Generator4>
internal::CartesianProductHolder4<Generator1, Generator2, Generator3,
    Generator4> Combine(
    const Generator1& g1, const Generator2& g2, const Generator3& g3,
        const Generator4& g4) {
  return internal::CartesianProductHolder4<Generator1, Generator2, Generator3,
      Generator4>(
      g1, g2, g3, g4);
}

template <typename Generator1, typename Generator2, typename Generator3,
    typename Generator4, typename Generator5>
internal::CartesianProductHolder5<Generator1, Generator2, Generator3,
    Generator4, Generator5> Combine(
    const Generator1& g1, const Generator2& g2, const Generator3& g3,
        const Generator4& g4, const Generator5& g5) {
  return internal::CartesianProductHolder5<Generator1, Generator2, Generator3,
      Generator4, Generator5>(
      g1, g2, g3, g4, g5);
}

template <typename Generator1, typename Generator2, typename Generator3,
    typename Generator4, typename Generator5, typename Generator6>
internal::CartesianProductHolder6<Generator1, Generator2, Generator3,
    Generator4, Generator5, Generator6> Combine(
    const Generator1& g1, const Generator2& g2, const Generator3& g3,
        const Generator4& g4, const Generator5& g5, const Generator6& g6) {
  return internal::CartesianProductHolder6<Generator1, Generator2, Generator3,
      Generator4, Generator5, Generator6>(
      g1, g2, g3, g4, g5, g6);
}

template <typename Generator1, typename Generator2, typename Generator3,
    typename Generator4, typename Generator5, typename Generator6,
    typename Generator7>
internal::CartesianProductHolder7<Generator1, Generator2, Generator3,
    Generator4, Generator5, Generator6, Generator7> Combine(
    const Generator1& g1, const Generator2& g2, const Generator3& g3,
        const Generator4& g4, const Generator5& g5, const Generator6& g6,
        const Generator7& g7) {
  return internal::CartesianProductHolder7<Generator1, Generator2, Generator3,
      Generator4, Generator5, Generator6, Generator7>(
      g1, g2, g3, g4, g5, g6, g7);
}

template <typename Generator1, typename Generator2, typename Generator3,
    typename Generator4, typename Generator5, typename Generator6,
    typename Generator7, typename Generator8>
internal::CartesianProductHolder8<Generator1, Generator2, Generator3,
    Generator4, Generator5, Generator6, Generator7, Generator8> Combine(
    const Generator1& g1, const Generator2& g2, const Generator3& g3,
        const Generator4& g4, const Generator5& g5, const Generator6& g6,
        const Generator7& g7, const Generator8& g8) {
  return internal::CartesianProductHolder8<Generator1, Generator2, Generator3,
      Generator4, Generator5, Generator6, Generator7, Generator8>(
      g1, g2, g3, g4, g5, g6, g7, g8);
}

template <typename Generator1, typename Generator2, typename Generator3,
    typename Generator4, typename Generator5, typename Generator6,
    typename Generator7, typename Generator8, typename Generator9>
internal::CartesianProductHolder9<Generator1, Generator2, Generator3,
    Generator4, Generator5, Generator6, Generator7, Generator8,
    Generator9> Combine(
    const Generator1& g1, const Generator2& g2, const Generator3& g3,
        const Generator4& g4, const Generator5& g5, const Generator6& g6,
        const Generator7& g7, const Generator8& g8, const Generator9& g9) {
  return internal::CartesianProductHolder9<Generator1, Generator2, Generator3,
      Generator4, Generator5, Generator6, Generator7, Generator8, Generator9>(
      g1, g2, g3, g4, g5, g6, g7, g8, g9);
}

template <typename Generator1, typename Generator2, typename Generator3,
    typename Generator4, typename Generator5, typename Generator6,
    typename Generator7, typename Generator8, typename Generator9,
    typename Generator10>
internal::CartesianProductHolder10<Generator1, Generator2, Generator3,
    Generator4, Generator5, Generator6, Generator7, Generator8, Generator9,
    Generator10> Combine(
    const Generator1& g1, const Generator2& g2, const Generator3& g3,
        const Generator4& g4, const Generator5& g5, const Generator6& g6,
        const Generator7& g7, const Generator8& g8, const Generator9& g9,
        const Generator10& g10) {
  return internal::CartesianProductHolder10<Generator1, Generator2, Generator3,
      Generator4, Generator5, Generator6, Generator7, Generator8, Generator9,
      Generator10>(
      g1, g2, g3, g4, g5, g6, g7, g8, g9, g10);
}
# endif  // GTEST_HAS_COMBINE


# define TEST_P(test_case_name, test_name) \
  class GTEST_TEST_CLASS_NAME_(test_case_name, test_name) \
      : public test_case_name { \
   public: \
    GTEST_TEST_CLASS_NAME_(test_case_name, test_name)() {} \
    virtual void TestBody(); \
   private: \
    static int AddToRegistry() { \
      ::testing::UnitTest::GetInstance()->parameterized_test_registry(). \
          GetTestCasePatternHolder<test_case_name>(\
              #test_case_name, \
              ::testing::internal::CodeLocation(\
                  __FILE__, __LINE__))->AddTestPattern(\
                      #test_case_name, \
                      #test_name, \
                      new ::testing::internal::TestMetaFactory< \
                          GTEST_TEST_CLASS_NAME_(\
                              test_case_name, test_name)>()); \
      return 0; \
    } \
    static int gtest_registering_dummy_ GTEST_ATTRIBUTE_UNUSED_; \
    GTEST_DISALLOW_COPY_AND_ASSIGN_(\
        GTEST_TEST_CLASS_NAME_(test_case_name, test_name)); \
  }; \
  int GTEST_TEST_CLASS_NAME_(test_case_name, \
                             test_name)::gtest_registering_dummy_ = \
      GTEST_TEST_CLASS_NAME_(test_case_name, test_name)::AddToRegistry(); \
  void GTEST_TEST_CLASS_NAME_(test_case_name, test_name)::TestBody()

// The optional last argument to INSTANTIATE_TEST_CASE_P allows the user
// to specify a function or functor that generates custom test name suffixes
// based on the test parameters. The function should accept one argument of
// type testing::TestParamInfo<class ParamType>, and return std::string.
//
// testing::PrintToStringParamName is a builtin test suffix generator that
// returns the value of testing::PrintToString(GetParam()). It does not work
// for std::string or C strings.
//
// Note: test names must be non-empty, unique, and may only contain ASCII
// alphanumeric characters or underscore.

# define INSTANTIATE_TEST_CASE_P(prefix, test_case_name, generator, ...) \
  ::testing::internal::ParamGenerator<test_case_name::ParamType> \
      gtest_##prefix##test_case_name##_EvalGenerator_() { return generator; } \
  ::std::string gtest_##prefix##test_case_name##_EvalGenerateName_( \
      const ::testing::TestParamInfo<test_case_name::ParamType>& info) { \
    return ::testing::internal::GetParamNameGen<test_case_name::ParamType> \
        (__VA_ARGS__)(info); \
  } \
  int gtest_##prefix##test_case_name##_dummy_ GTEST_ATTRIBUTE_UNUSED_ = \
      ::testing::UnitTest::GetInstance()->parameterized_test_registry(). \
          GetTestCasePatternHolder<test_case_name>(\
              #test_case_name, \
              ::testing::internal::CodeLocation(\
                  __FILE__, __LINE__))->AddTestCaseInstantiation(\
                      #prefix, \
                      &gtest_##prefix##test_case_name##_EvalGenerator_, \
                      &gtest_##prefix##test_case_name##_EvalGenerateName_, \
                      __FILE__, __LINE__)

}  // namespace testing

#endif  // GTEST_HAS_PARAM_TEST

#endif  // GTEST_INCLUDE_GTEST_GTEST_PARAM_TEST_H_
// Copyright 2006, Google Inc.
// All rights reserved.
//
// Redistribution and use in source and binary forms, with or without
// modification, are permitted provided that the following conditions are
// met:
//
//     * Redistributions of source code must retain the above copyright
// notice, this list of conditions and the following disclaimer.
//     * Redistributions in binary form must reproduce the above
// copyright notice, this list of conditions and the following disclaimer
// in the documentation and/or other materials provided with the
// distribution.
//     * Neither the name of Google Inc. nor the names of its
// contributors may be used to endorse or promote products derived from
// this software without specific prior written permission.
//
// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
// "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
// LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
// A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
// OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
// SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
// LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
// DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
// THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
//
// Author: wan@google.com (Zhanyong Wan)
//
// Google C++ Testing Framework definitions useful in production code.

#ifndef GTEST_INCLUDE_GTEST_GTEST_PROD_H_
#define GTEST_INCLUDE_GTEST_GTEST_PROD_H_

// When you need to test the private or protected members of a class,
// use the FRIEND_TEST macro to declare your tests as friends of the
// class.  For example:
//
// class MyClass {
//  private:
//   void MyMethod();
//   FRIEND_TEST(MyClassTest, MyMethod);
// };
//
// class MyClassTest : public testing::Test {
//   // ...
// };
//
// TEST_F(MyClassTest, MyMethod) {
//   // Can call MyClass::MyMethod() here.
// }

#define FRIEND_TEST(test_case_name, test_name)\
friend class test_case_name##_##test_name##_Test

#endif  // GTEST_INCLUDE_GTEST_GTEST_PROD_H_
// Copyright 2008, Google Inc.
// All rights reserved.
//
// Redistribution and use in source and binary forms, with or without
// modification, are permitted provided that the following conditions are
// met:
//
//     * Redistributions of source code must retain the above copyright
// notice, this list of conditions and the following disclaimer.
//     * Redistributions in binary form must reproduce the above
// copyright notice, this list of conditions and the following disclaimer
// in the documentation and/or other materials provided with the
// distribution.
//     * Neither the name of Google Inc. nor the names of its
// contributors may be used to endorse or promote products derived from
// this software without specific prior written permission.
//
// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
// "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
// LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
// A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
// OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
// SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
// LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
// DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
// THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
//
// Author: mheule@google.com (Markus Heule)
//

#ifndef GTEST_INCLUDE_GTEST_GTEST_TEST_PART_H_
#define GTEST_INCLUDE_GTEST_GTEST_TEST_PART_H_

#include <iosfwd>
#include <vector>

namespace testing {

// A copyable object representing the result of a test part (i.e. an
// assertion or an explicit FAIL(), ADD_FAILURE(), or SUCCESS()).
//
// Don't inherit from TestPartResult as its destructor is not virtual.
class GTEST_API_ TestPartResult {
 public:
  // The possible outcomes of a test part (i.e. an assertion or an
  // explicit SUCCEED(), FAIL(), or ADD_FAILURE()).
  enum Type {
    kSuccess,          // Succeeded.
    kNonFatalFailure,  // Failed but the test can continue.
    kFatalFailure      // Failed and the test should be terminated.
  };

  // C'tor.  TestPartResult does NOT have a default constructor.
  // Always use this constructor (with parameters) to create a
  // TestPartResult object.
  TestPartResult(Type a_type,
                 const char* a_file_name,
                 int a_line_number,
                 const char* a_message)
      : type_(a_type),
        file_name_(a_file_name == NULL ? "" : a_file_name),
        line_number_(a_line_number),
        summary_(ExtractSummary(a_message)),
        message_(a_message) {
  }

  // Gets the outcome of the test part.
  Type type() const { return type_; }

  // Gets the name of the source file where the test part took place, or
  // NULL if it's unknown.
  const char* file_name() const {
    return file_name_.empty() ? NULL : file_name_.c_str();
  }

  // Gets the line in the source file where the test part took place,
  // or -1 if it's unknown.
  int line_number() const { return line_number_; }

  // Gets the summary of the failure message.
  const char* summary() const { return summary_.c_str(); }

  // Gets the message associated with the test part.
  const char* message() const { return message_.c_str(); }

  // Returns true iff the test part passed.
  bool passed() const { return type_ == kSuccess; }

  // Returns true iff the test part failed.
  bool failed() const { return type_ != kSuccess; }

  // Returns true iff the test part non-fatally failed.
  bool nonfatally_failed() const { return type_ == kNonFatalFailure; }

  // Returns true iff the test part fatally failed.
  bool fatally_failed() const { return type_ == kFatalFailure; }

 private:
  Type type_;

  // Gets the summary of the failure message by omitting the stack
  // trace in it.
  static std::string ExtractSummary(const char* message);

  // The name of the source file where the test part took place, or
  // "" if the source file is unknown.
  std::string file_name_;
  // The line in the source file where the test part took place, or -1
  // if the line number is unknown.
  int line_number_;
  std::string summary_;  // The test failure summary.
  std::string message_;  // The test failure message.
};

// Prints a TestPartResult object.
std::ostream& operator<<(std::ostream& os, const TestPartResult& result);

// An array of TestPartResult objects.
//
// Don't inherit from TestPartResultArray as its destructor is not
// virtual.
class GTEST_API_ TestPartResultArray {
 public:
  TestPartResultArray() {}

  // Appends the given TestPartResult to the array.
  void Append(const TestPartResult& result);

  // Returns the TestPartResult at the given index (0-based).
  const TestPartResult& GetTestPartResult(int index) const;

  // Returns the number of TestPartResult objects in the array.
  int size() const;

 private:
  std::vector<TestPartResult> array_;

  GTEST_DISALLOW_COPY_AND_ASSIGN_(TestPartResultArray);
};

// This interface knows how to report a test part result.
class TestPartResultReporterInterface {
 public:
  virtual ~TestPartResultReporterInterface() {}

  virtual void ReportTestPartResult(const TestPartResult& result) = 0;
};

namespace internal {

// This helper class is used by {ASSERT|EXPECT}_NO_FATAL_FAILURE to check if a
// statement generates new fatal failures. To do so it registers itself as the
// current test part result reporter. Besides checking if fatal failures were
// reported, it only delegates the reporting to the former result reporter.
// The original result reporter is restored in the destructor.
// INTERNAL IMPLEMENTATION - DO NOT USE IN A USER PROGRAM.
class GTEST_API_ HasNewFatalFailureHelper
    : public TestPartResultReporterInterface {
 public:
  HasNewFatalFailureHelper();
  virtual ~HasNewFatalFailureHelper();
  virtual void ReportTestPartResult(const TestPartResult& result);
  bool has_new_fatal_failure() const { return has_new_fatal_failure_; }
 private:
  bool has_new_fatal_failure_;
  TestPartResultReporterInterface* original_reporter_;

  GTEST_DISALLOW_COPY_AND_ASSIGN_(HasNewFatalFailureHelper);
};

}  // namespace internal

}  // namespace testing

#endif  // GTEST_INCLUDE_GTEST_GTEST_TEST_PART_H_
// Copyright 2008 Google Inc.
// All Rights Reserved.
//
// Redistribution and use in source and binary forms, with or without
// modification, are permitted provided that the following conditions are
// met:
//
//     * Redistributions of source code must retain the above copyright
// notice, this list of conditions and the following disclaimer.
//     * Redistributions in binary form must reproduce the above
// copyright notice, this list of conditions and the following disclaimer
// in the documentation and/or other materials provided with the
// distribution.
//     * Neither the name of Google Inc. nor the names of its
// contributors may be used to endorse or promote products derived from
// this software without specific prior written permission.
//
// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
// "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
// LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
// A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
// OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
// SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
// LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
// DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
// THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
//
// Author: wan@google.com (Zhanyong Wan)

#ifndef GTEST_INCLUDE_GTEST_GTEST_TYPED_TEST_H_
#define GTEST_INCLUDE_GTEST_GTEST_TYPED_TEST_H_

// This header implements typed tests and type-parameterized tests.

// Typed (aka type-driven) tests repeat the same test for types in a
// list.  You must know which types you want to test with when writing
// typed tests. Here's how you do it:

#if 0

// First, define a fixture class template.  It should be parameterized
// by a type.  Remember to derive it from testing::Test.
template <typename T>
class FooTest : public testing::Test {
 public:
  ...
  typedef std::list<T> List;
  static T shared_;
  T value_;
};

// Next, associate a list of types with the test case, which will be
// repeated for each type in the list.  The typedef is necessary for
// the macro to parse correctly.
typedef testing::Types<char, int, unsigned int> MyTypes;
TYPED_TEST_CASE(FooTest, MyTypes);

// If the type list contains only one type, you can write that type
// directly without Types<...>:
//   TYPED_TEST_CASE(FooTest, int);

// Then, use TYPED_TEST() instead of TEST_F() to define as many typed
// tests for this test case as you want.
TYPED_TEST(FooTest, DoesBlah) {
  // Inside a test, refer to TypeParam to get the type parameter.
  // Since we are inside a derived class template, C++ requires use to
  // visit the members of FooTest via 'this'.
  TypeParam n = this->value_;

  // To visit static members of the fixture, add the TestFixture::
  // prefix.
  n += TestFixture::shared_;

  // To refer to typedefs in the fixture, add the "typename
  // TestFixture::" prefix.
  typename TestFixture::List values;
  values.push_back(n);
  ...
}

TYPED_TEST(FooTest, HasPropertyA) { ... }

#endif  // 0

// Type-parameterized tests are abstract test patterns parameterized
// by a type.  Compared with typed tests, type-parameterized tests
// allow you to define the test pattern without knowing what the type
// parameters are.  The defined pattern can be instantiated with
// different types any number of times, in any number of translation
// units.
//
// If you are designing an interface or concept, you can define a
// suite of type-parameterized tests to verify properties that any
// valid implementation of the interface/concept should have.  Then,
// each implementation can easily instantiate the test suite to verify
// that it conforms to the requirements, without having to write
// similar tests repeatedly.  Here's an example:

#if 0

// First, define a fixture class template.  It should be parameterized
// by a type.  Remember to derive it from testing::Test.
template <typename T>
class FooTest : public testing::Test {
  ...
};

// Next, declare that you will define a type-parameterized test case
// (the _P suffix is for "parameterized" or "pattern", whichever you
// prefer):
TYPED_TEST_CASE_P(FooTest);

// Then, use TYPED_TEST_P() to define as many type-parameterized tests
// for this type-parameterized test case as you want.
TYPED_TEST_P(FooTest, DoesBlah) {
  // Inside a test, refer to TypeParam to get the type parameter.
  TypeParam n = 0;
  ...
}

TYPED_TEST_P(FooTest, HasPropertyA) { ... }

// Now the tricky part: you need to register all test patterns before
// you can instantiate them.  The first argument of the macro is the
// test case name; the rest are the names of the tests in this test
// case.
REGISTER_TYPED_TEST_CASE_P(FooTest,
                           DoesBlah, HasPropertyA);

// Finally, you are free to instantiate the pattern with the types you
// want.  If you put the above code in a header file, you can #include
// it in multiple C++ source files and instantiate it multiple times.
//
// To distinguish different instances of the pattern, the first
// argument to the INSTANTIATE_* macro is a prefix that will be added
// to the actual test case name.  Remember to pick unique prefixes for
// different instances.
typedef testing::Types<char, int, unsigned int> MyTypes;
INSTANTIATE_TYPED_TEST_CASE_P(My, FooTest, MyTypes);

// If the type list contains only one type, you can write that type
// directly without Types<...>:
//   INSTANTIATE_TYPED_TEST_CASE_P(My, FooTest, int);

#endif  // 0


// Implements typed tests.

#if GTEST_HAS_TYPED_TEST

// INTERNAL IMPLEMENTATION - DO NOT USE IN USER CODE.
//
// Expands to the name of the typedef for the type parameters of the
// given test case.
# define GTEST_TYPE_PARAMS_(TestCaseName) gtest_type_params_##TestCaseName##_

// The 'Types' template argument below must have spaces around it
// since some compilers may choke on '>>' when passing a template
// instance (e.g. Types<int>)
# define TYPED_TEST_CASE(CaseName, Types) \
  typedef ::testing::internal::TypeList< Types >::type \
      GTEST_TYPE_PARAMS_(CaseName)

# define TYPED_TEST(CaseName, TestName) \
  template <typename gtest_TypeParam_> \
  class GTEST_TEST_CLASS_NAME_(CaseName, TestName) \
      : public CaseName<gtest_TypeParam_> { \
   private: \
    typedef CaseName<gtest_TypeParam_> TestFixture; \
    typedef gtest_TypeParam_ TypeParam; \
    virtual void TestBody(); \
  }; \
  bool gtest_##CaseName##_##TestName##_registered_ GTEST_ATTRIBUTE_UNUSED_ = \
      ::testing::internal::TypeParameterizedTest< \
          CaseName, \
          ::testing::internal::TemplateSel< \
              GTEST_TEST_CLASS_NAME_(CaseName, TestName)>, \
          GTEST_TYPE_PARAMS_(CaseName)>::Register(\
              "", ::testing::internal::CodeLocation(__FILE__, __LINE__), \
              #CaseName, #TestName, 0); \
  template <typename gtest_TypeParam_> \
  void GTEST_TEST_CLASS_NAME_(CaseName, TestName)<gtest_TypeParam_>::TestBody()

#endif  // GTEST_HAS_TYPED_TEST

// Implements type-parameterized tests.

#if GTEST_HAS_TYPED_TEST_P

// INTERNAL IMPLEMENTATION - DO NOT USE IN USER CODE.
//
// Expands to the namespace name that the type-parameterized tests for
// the given type-parameterized test case are defined in.  The exact
// name of the namespace is subject to change without notice.
# define GTEST_CASE_NAMESPACE_(TestCaseName) \
  gtest_case_##TestCaseName##_

// INTERNAL IMPLEMENTATION - DO NOT USE IN USER CODE.
//
// Expands to the name of the variable used to remember the names of
// the defined tests in the given test case.
# define GTEST_TYPED_TEST_CASE_P_STATE_(TestCaseName) \
  gtest_typed_test_case_p_state_##TestCaseName##_

// INTERNAL IMPLEMENTATION - DO NOT USE IN USER CODE DIRECTLY.
//
// Expands to the name of the variable used to remember the names of
// the registered tests in the given test case.
# define GTEST_REGISTERED_TEST_NAMES_(TestCaseName) \
  gtest_registered_test_names_##TestCaseName##_

// The variables defined in the type-parameterized test macros are
// static as typically these macros are used in a .h file that can be
// #included in multiple translation units linked together.
# define TYPED_TEST_CASE_P(CaseName) \
  static ::testing::internal::TypedTestCasePState \
      GTEST_TYPED_TEST_CASE_P_STATE_(CaseName)

# define TYPED_TEST_P(CaseName, TestName) \
  namespace GTEST_CASE_NAMESPACE_(CaseName) { \
  template <typename gtest_TypeParam_> \
  class TestName : public CaseName<gtest_TypeParam_> { \
   private: \
    typedef CaseName<gtest_TypeParam_> TestFixture; \
    typedef gtest_TypeParam_ TypeParam; \
    virtual void TestBody(); \
  }; \
  static bool gtest_##TestName##_defined_ GTEST_ATTRIBUTE_UNUSED_ = \
      GTEST_TYPED_TEST_CASE_P_STATE_(CaseName).AddTestName(\
          __FILE__, __LINE__, #CaseName, #TestName); \
  } \
  template <typename gtest_TypeParam_> \
  void GTEST_CASE_NAMESPACE_(CaseName)::TestName<gtest_TypeParam_>::TestBody()

# define REGISTER_TYPED_TEST_CASE_P(CaseName, ...) \
  namespace GTEST_CASE_NAMESPACE_(CaseName) { \
  typedef ::testing::internal::Templates<__VA_ARGS__>::type gtest_AllTests_; \
  } \
  static const char* const GTEST_REGISTERED_TEST_NAMES_(CaseName) = \
      GTEST_TYPED_TEST_CASE_P_STATE_(CaseName).VerifyRegisteredTestNames(\
          __FILE__, __LINE__, #__VA_ARGS__)

// The 'Types' template argument below must have spaces around it
// since some compilers may choke on '>>' when passing a template
// instance (e.g. Types<int>)
# define INSTANTIATE_TYPED_TEST_CASE_P(Prefix, CaseName, Types) \
  bool gtest_##Prefix##_##CaseName GTEST_ATTRIBUTE_UNUSED_ = \
      ::testing::internal::TypeParameterizedTestCase<CaseName, \
          GTEST_CASE_NAMESPACE_(CaseName)::gtest_AllTests_, \
          ::testing::internal::TypeList< Types >::type>::Register(\
              #Prefix, \
              ::testing::internal::CodeLocation(__FILE__, __LINE__), \
              &GTEST_TYPED_TEST_CASE_P_STATE_(CaseName), \
              #CaseName, GTEST_REGISTERED_TEST_NAMES_(CaseName))

#endif  // GTEST_HAS_TYPED_TEST_P

#endif  // GTEST_INCLUDE_GTEST_GTEST_TYPED_TEST_H_

// Depending on the platform, different string classes are available.
// On Linux, in addition to ::std::string, Google also makes use of
// class ::string, which has the same interface as ::std::string, but
// has a different implementation.
//
// You can define GTEST_HAS_GLOBAL_STRING to 1 to indicate that
// ::string is available AND is a distinct type to ::std::string, or
// define it to 0 to indicate otherwise.
//
// If ::std::string and ::string are the same class on your platform
// due to aliasing, you should define GTEST_HAS_GLOBAL_STRING to 0.
//
// If you do not define GTEST_HAS_GLOBAL_STRING, it is defined
// heuristically.

namespace testing {

// Declares the flags.

// This flag temporary enables the disabled tests.
GTEST_DECLARE_bool_(also_run_disabled_tests);

// This flag brings the debugger on an assertion failure.
GTEST_DECLARE_bool_(break_on_failure);

// This flag controls whether Google Test catches all test-thrown exceptions
// and logs them as failures.
GTEST_DECLARE_bool_(catch_exceptions);

// This flag enables using colors in terminal output. Available values are
// "yes" to enable colors, "no" (disable colors), or "auto" (the default)
// to let Google Test decide.
GTEST_DECLARE_string_(color);

// This flag sets up the filter to select by name using a glob pattern
// the tests to run. If the filter is not given all tests are executed.
GTEST_DECLARE_string_(filter);

// This flag causes the Google Test to list tests. None of the tests listed
// are actually run if the flag is provided.
GTEST_DECLARE_bool_(list_tests);

// This flag controls whether Google Test emits a detailed XML report to a file
// in addition to its normal textual output.
GTEST_DECLARE_string_(output);

// This flags control whether Google Test prints the elapsed time for each
// test.
GTEST_DECLARE_bool_(print_time);

// This flag specifies the random number seed.
GTEST_DECLARE_int32_(random_seed);

// This flag sets how many times the tests are repeated. The default value
// is 1. If the value is -1 the tests are repeating forever.
GTEST_DECLARE_int32_(repeat);

// This flag controls whether Google Test includes Google Test internal
// stack frames in failure stack traces.
GTEST_DECLARE_bool_(show_internal_stack_frames);

// When this flag is specified, tests' order is randomized on every iteration.
GTEST_DECLARE_bool_(shuffle);

// This flag specifies the maximum number of stack frames to be
// printed in a failure message.
GTEST_DECLARE_int32_(stack_trace_depth);

// When this flag is specified, a failed assertion will throw an
// exception if exceptions are enabled, or exit the program with a
// non-zero code otherwise.
GTEST_DECLARE_bool_(throw_on_failure);

// When this flag is set with a "host:port" string, on supported
// platforms test results are streamed to the specified port on
// the specified host machine.
GTEST_DECLARE_string_(stream_result_to);

// The upper limit for valid stack trace depths.
const int kMaxStackTraceDepth = 100;

namespace internal {

class AssertHelper;
class DefaultGlobalTestPartResultReporter;
class ExecDeathTest;
class NoExecDeathTest;
class FinalSuccessChecker;
class GTestFlagSaver;
class StreamingListenerTest;
class TestResultAccessor;
class TestEventListenersAccessor;
class TestEventRepeater;
class UnitTestRecordPropertyTestHelper;
class WindowsDeathTest;
class UnitTestImpl* GetUnitTestImpl();
void ReportFailureInUnknownLocation(TestPartResult::Type result_type,
                                    const std::string& message);

}  // namespace internal

// The friend relationship of some of these classes is cyclic.
// If we don't forward declare them the compiler might confuse the classes
// in friendship clauses with same named classes on the scope.
class Test;
class TestCase;
class TestInfo;
class UnitTest;

// A class for indicating whether an assertion was successful.  When
// the assertion wasn't successful, the AssertionResult object
// remembers a non-empty message that describes how it failed.
//
// To create an instance of this class, use one of the factory functions
// (AssertionSuccess() and AssertionFailure()).
//
// This class is useful for two purposes:
//   1. Defining predicate functions to be used with Boolean test assertions
//      EXPECT_TRUE/EXPECT_FALSE and their ASSERT_ counterparts
//   2. Defining predicate-format functions to be
//      used with predicate assertions (ASSERT_PRED_FORMAT*, etc).
//
// For example, if you define IsEven predicate:
//
//   testing::AssertionResult IsEven(int n) {
//     if ((n % 2) == 0)
//       return testing::AssertionSuccess();
//     else
//       return testing::AssertionFailure() << n << " is odd";
//   }
//
// Then the failed expectation EXPECT_TRUE(IsEven(Fib(5)))
// will print the message
//
//   Value of: IsEven(Fib(5))
//     Actual: false (5 is odd)
//   Expected: true
//
// instead of a more opaque
//
//   Value of: IsEven(Fib(5))
//     Actual: false
//   Expected: true
//
// in case IsEven is a simple Boolean predicate.
//
// If you expect your predicate to be reused and want to support informative
// messages in EXPECT_FALSE and ASSERT_FALSE (negative assertions show up
// about half as often as positive ones in our tests), supply messages for
// both success and failure cases:
//
//   testing::AssertionResult IsEven(int n) {
//     if ((n % 2) == 0)
//       return testing::AssertionSuccess() << n << " is even";
//     else
//       return testing::AssertionFailure() << n << " is odd";
//   }
//
// Then a statement EXPECT_FALSE(IsEven(Fib(6))) will print
//
//   Value of: IsEven(Fib(6))
//     Actual: true (8 is even)
//   Expected: false
//
// NB: Predicates that support negative Boolean assertions have reduced
// performance in positive ones so be careful not to use them in tests
// that have lots (tens of thousands) of positive Boolean assertions.
//
// To use this class with EXPECT_PRED_FORMAT assertions such as:
//
//   // Verifies that Foo() returns an even number.
//   EXPECT_PRED_FORMAT1(IsEven, Foo());
//
// you need to define:
//
//   testing::AssertionResult IsEven(const char* expr, int n) {
//     if ((n % 2) == 0)
//       return testing::AssertionSuccess();
//     else
//       return testing::AssertionFailure()
//         << "Expected: " << expr << " is even\n  Actual: it's " << n;
//   }
//
// If Foo() returns 5, you will see the following message:
//
//   Expected: Foo() is even
//     Actual: it's 5
//
class GTEST_API_ AssertionResult {
 public:
  // Copy constructor.
  // Used in EXPECT_TRUE/FALSE(assertion_result).
  AssertionResult(const AssertionResult& other);

  GTEST_DISABLE_MSC_WARNINGS_PUSH_(4800 /* forcing value to bool */)

  // Used in the EXPECT_TRUE/FALSE(bool_expression).
  //
  // T must be contextually convertible to bool.
  //
  // The second parameter prevents this overload from being considered if
  // the argument is implicitly convertible to AssertionResult. In that case
  // we want AssertionResult's copy constructor to be used.
  template <typename T>
  explicit AssertionResult(
      const T& success,
      typename internal::EnableIf<
          !internal::ImplicitlyConvertible<T, AssertionResult>::value>::type*
          /*enabler*/ = NULL)
      : success_(success) {}

  GTEST_DISABLE_MSC_WARNINGS_POP_()

  // Assignment operator.
  AssertionResult& operator=(AssertionResult other) {
    swap(other);
    return *this;
  }

  // Returns true iff the assertion succeeded.
  operator bool() const { return success_; }  // NOLINT

  // Returns the assertion's negation. Used with EXPECT/ASSERT_FALSE.
  AssertionResult operator!() const;

  // Returns the text streamed into this AssertionResult. Test assertions
  // use it when they fail (i.e., the predicate's outcome doesn't match the
  // assertion's expectation). When nothing has been streamed into the
  // object, returns an empty string.
  const char* message() const {
    return message_.get() != NULL ?  message_->c_str() : "";
  }
  // TODO(vladl@google.com): Remove this after making sure no clients use it.
  // Deprecated; please use message() instead.
  const char* failure_message() const { return message(); }

  // Streams a custom failure message into this object.
  template <typename T> AssertionResult& operator<<(const T& value) {
    AppendMessage(Message() << value);
    return *this;
  }

  // Allows streaming basic output manipulators such as endl or flush into
  // this object.
  AssertionResult& operator<<(
      ::std::ostream& (*basic_manipulator)(::std::ostream& stream)) {
    AppendMessage(Message() << basic_manipulator);
    return *this;
  }

 private:
  // Appends the contents of message to message_.
  void AppendMessage(const Message& a_message) {
    if (message_.get() == NULL)
      message_.reset(new ::std::string);
    message_->append(a_message.GetString().c_str());
  }

  // Swap the contents of this AssertionResult with other.
  void swap(AssertionResult& other);

  // Stores result of the assertion predicate.
  bool success_;
  // Stores the message describing the condition in case the expectation
  // construct is not satisfied with the predicate's outcome.
  // Referenced via a pointer to avoid taking too much stack frame space
  // with test assertions.
  internal::scoped_ptr< ::std::string> message_;
};

// Makes a successful assertion result.
GTEST_API_ AssertionResult AssertionSuccess();

// Makes a failed assertion result.
GTEST_API_ AssertionResult AssertionFailure();

// Makes a failed assertion result with the given failure message.
// Deprecated; use AssertionFailure() << msg.
GTEST_API_ AssertionResult AssertionFailure(const Message& msg);

// The abstract class that all tests inherit from.
//
// In Google Test, a unit test program contains one or many TestCases, and
// each TestCase contains one or many Tests.
//
// When you define a test using the TEST macro, you don't need to
// explicitly derive from Test - the TEST macro automatically does
// this for you.
//
// The only time you derive from Test is when defining a test fixture
// to be used a TEST_F.  For example:
//
//   class FooTest : public testing::Test {
//    protected:
//     void SetUp() override { ... }
//     void TearDown() override { ... }
//     ...
//   };
//
//   TEST_F(FooTest, Bar) { ... }
//   TEST_F(FooTest, Baz) { ... }
//
// Test is not copyable.
class GTEST_API_ Test {
 public:
  friend class TestInfo;

  // Defines types for pointers to functions that set up and tear down
  // a test case.
  typedef internal::SetUpTestCaseFunc SetUpTestCaseFunc;
  typedef internal::TearDownTestCaseFunc TearDownTestCaseFunc;

  // The d'tor is virtual as we intend to inherit from Test.
  virtual ~Test();

  // Sets up the stuff shared by all tests in this test case.
  //
  // Google Test will call Foo::SetUpTestCase() before running the first
  // test in test case Foo.  Hence a sub-class can define its own
  // SetUpTestCase() method to shadow the one defined in the super
  // class.
  static void SetUpTestCase() {}

  // Tears down the stuff shared by all tests in this test case.
  //
  // Google Test will call Foo::TearDownTestCase() after running the last
  // test in test case Foo.  Hence a sub-class can define its own
  // TearDownTestCase() method to shadow the one defined in the super
  // class.
  static void TearDownTestCase() {}

  // Returns true iff the current test has a fatal failure.
  static bool HasFatalFailure();

  // Returns true iff the current test has a non-fatal failure.
  static bool HasNonfatalFailure();

  // Returns true iff the current test has a (either fatal or
  // non-fatal) failure.
  static bool HasFailure() { return HasFatalFailure() || HasNonfatalFailure(); }

  // Logs a property for the current test, test case, or for the entire
  // invocation of the test program when used outside of the context of a
  // test case.  Only the last value for a given key is remembered.  These
  // are public static so they can be called from utility functions that are
  // not members of the test fixture.  Calls to RecordProperty made during
  // lifespan of the test (from the moment its constructor starts to the
  // moment its destructor finishes) will be output in XML as attributes of
  // the <testcase> element.  Properties recorded from fixture's
  // SetUpTestCase or TearDownTestCase are logged as attributes of the
  // corresponding <testsuite> element.  Calls to RecordProperty made in the
  // global context (before or after invocation of RUN_ALL_TESTS and from
  // SetUp/TearDown method of Environment objects registered with Google
  // Test) will be output as attributes of the <testsuites> element.
  static void RecordProperty(const std::string& key, const std::string& value);
  static void RecordProperty(const std::string& key, int value);

 protected:
  // Creates a Test object.
  Test();

  // Sets up the test fixture.
  virtual void SetUp();

  // Tears down the test fixture.
  virtual void TearDown();

 private:
  // Returns true iff the current test has the same fixture class as
  // the first test in the current test case.
  static bool HasSameFixtureClass();

  // Runs the test after the test fixture has been set up.
  //
  // A sub-class must implement this to define the test logic.
  //
  // DO NOT OVERRIDE THIS FUNCTION DIRECTLY IN A USER PROGRAM.
  // Instead, use the TEST or TEST_F macro.
  virtual void TestBody() = 0;

  // Sets up, executes, and tears down the test.
  void Run();

  // Deletes self.  We deliberately pick an unusual name for this
  // internal method to avoid clashing with names used in user TESTs.
  void DeleteSelf_() { delete this; }

  const internal::scoped_ptr< GTEST_FLAG_SAVER_ > gtest_flag_saver_;

  // Often a user misspells SetUp() as Setup() and spends a long time
  // wondering why it is never called by Google Test.  The declaration of
  // the following method is solely for catching such an error at
  // compile time:
  //
  //   - The return type is deliberately chosen to be not void, so it
  //   will be a conflict if void Setup() is declared in the user's
  //   test fixture.
  //
  //   - This method is private, so it will be another compiler error
  //   if the method is called from the user's test fixture.
  //
  // DO NOT OVERRIDE THIS FUNCTION.
  //
  // If you see an error about overriding the following function or
  // about it being private, you have mis-spelled SetUp() as Setup().
  struct Setup_should_be_spelled_SetUp {};
  virtual Setup_should_be_spelled_SetUp* Setup() { return NULL; }

  // We disallow copying Tests.
  GTEST_DISALLOW_COPY_AND_ASSIGN_(Test);
};

typedef internal::TimeInMillis TimeInMillis;

// A copyable object representing a user specified test property which can be
// output as a key/value string pair.
//
// Don't inherit from TestProperty as its destructor is not virtual.
class TestProperty {
 public:
  // C'tor.  TestProperty does NOT have a default constructor.
  // Always use this constructor (with parameters) to create a
  // TestProperty object.
  TestProperty(const std::string& a_key, const std::string& a_value) :
    key_(a_key), value_(a_value) {
  }

  // Gets the user supplied key.
  const char* key() const {
    return key_.c_str();
  }

  // Gets the user supplied value.
  const char* value() const {
    return value_.c_str();
  }

  // Sets a new value, overriding the one supplied in the constructor.
  void SetValue(const std::string& new_value) {
    value_ = new_value;
  }

 private:
  // The key supplied by the user.
  std::string key_;
  // The value supplied by the user.
  std::string value_;
};

// The result of a single Test.  This includes a list of
// TestPartResults, a list of TestProperties, a count of how many
// death tests there are in the Test, and how much time it took to run
// the Test.
//
// TestResult is not copyable.
class GTEST_API_ TestResult {
 public:
  // Creates an empty TestResult.
  TestResult();

  // D'tor.  Do not inherit from TestResult.
  ~TestResult();

  // Gets the number of all test parts.  This is the sum of the number
  // of successful test parts and the number of failed test parts.
  int total_part_count() const;

  // Returns the number of the test properties.
  int test_property_count() const;

  // Returns true iff the test passed (i.e. no test part failed).
  bool Passed() const { return !Failed(); }

  // Returns true iff the test failed.
  bool Failed() const;

  // Returns true iff the test fatally failed.
  bool HasFatalFailure() const;

  // Returns true iff the test has a non-fatal failure.
  bool HasNonfatalFailure() const;

  // Returns the elapsed time, in milliseconds.
  TimeInMillis elapsed_time() const { return elapsed_time_; }

  // Returns the i-th test part result among all the results. i can range
  // from 0 to test_property_count() - 1. If i is not in that range, aborts
  // the program.
  const TestPartResult& GetTestPartResult(int i) const;

  // Returns the i-th test property. i can range from 0 to
  // test_property_count() - 1. If i is not in that range, aborts the
  // program.
  const TestProperty& GetTestProperty(int i) const;

 private:
  friend class TestInfo;
  friend class TestCase;
  friend class UnitTest;
  friend class internal::DefaultGlobalTestPartResultReporter;
  friend class internal::ExecDeathTest;
  friend class internal::TestResultAccessor;
  friend class internal::UnitTestImpl;
  friend class internal::WindowsDeathTest;

  // Gets the vector of TestPartResults.
  const std::vector<TestPartResult>& test_part_results() const {
    return test_part_results_;
  }

  // Gets the vector of TestProperties.
  const std::vector<TestProperty>& test_properties() const {
    return test_properties_;
  }

  // Sets the elapsed time.
  void set_elapsed_time(TimeInMillis elapsed) { elapsed_time_ = elapsed; }

  // Adds a test property to the list. The property is validated and may add
  // a non-fatal failure if invalid (e.g., if it conflicts with reserved
  // key names). If a property is already recorded for the same key, the
  // value will be updated, rather than storing multiple values for the same
  // key.  xml_element specifies the element for which the property is being
  // recorded and is used for validation.
  void RecordProperty(const std::string& xml_element,
                      const TestProperty& test_property);

  // Adds a failure if the key is a reserved attribute of Google Test
  // testcase tags.  Returns true if the property is valid.
  // TODO(russr): Validate attribute names are legal and human readable.
  static bool ValidateTestProperty(const std::string& xml_element,
                                   const TestProperty& test_property);

  // Adds a test part result to the list.
  void AddTestPartResult(const TestPartResult& test_part_result);

  // Returns the death test count.
  int death_test_count() const { return death_test_count_; }

  // Increments the death test count, returning the new count.
  int increment_death_test_count() { return ++death_test_count_; }

  // Clears the test part results.
  void ClearTestPartResults();

  // Clears the object.
  void Clear();

  // Protects mutable state of the property vector and of owned
  // properties, whose values may be updated.
  internal::Mutex test_properites_mutex_;

  // The vector of TestPartResults
  std::vector<TestPartResult> test_part_results_;
  // The vector of TestProperties
  std::vector<TestProperty> test_properties_;
  // Running count of death tests.
  int death_test_count_;
  // The elapsed time, in milliseconds.
  TimeInMillis elapsed_time_;

  // We disallow copying TestResult.
  GTEST_DISALLOW_COPY_AND_ASSIGN_(TestResult);
};  // class TestResult

// A TestInfo object stores the following information about a test:
//
//   Test case name
//   Test name
//   Whether the test should be run
//   A function pointer that creates the test object when invoked
//   Test result
//
// The constructor of TestInfo registers itself with the UnitTest
// singleton such that the RUN_ALL_TESTS() macro knows which tests to
// run.
class GTEST_API_ TestInfo {
 public:
  // Destructs a TestInfo object.  This function is not virtual, so
  // don't inherit from TestInfo.
  ~TestInfo();

  // Returns the test case name.
  const char* test_case_name() const { return test_case_name_.c_str(); }

  // Returns the test name.
  const char* name() const { return name_.c_str(); }

  // Returns the name of the parameter type, or NULL if this is not a typed
  // or a type-parameterized test.
  const char* type_param() const {
    if (type_param_.get() != NULL)
      return type_param_->c_str();
    return NULL;
  }

  // Returns the text representation of the value parameter, or NULL if this
  // is not a value-parameterized test.
  const char* value_param() const {
    if (value_param_.get() != NULL)
      return value_param_->c_str();
    return NULL;
  }

  // Returns the file name where this test is defined.
  const char* file() const { return location_.file.c_str(); }

  // Returns the line where this test is defined.
  int line() const { return location_.line; }

  // Returns true if this test should run, that is if the test is not
  // disabled (or it is disabled but the also_run_disabled_tests flag has
  // been specified) and its full name matches the user-specified filter.
  //
  // Google Test allows the user to filter the tests by their full names.
  // The full name of a test Bar in test case Foo is defined as
  // "Foo.Bar".  Only the tests that match the filter will run.
  //
  // A filter is a colon-separated list of glob (not regex) patterns,
  // optionally followed by a '-' and a colon-separated list of
  // negative patterns (tests to exclude).  A test is run if it
  // matches one of the positive patterns and does not match any of
  // the negative patterns.
  //
  // For example, *A*:Foo.* is a filter that matches any string that
  // contains the character 'A' or starts with "Foo.".
  bool should_run() const { return should_run_; }

  // Returns true iff this test will appear in the XML report.
  bool is_reportable() const {
    // For now, the XML report includes all tests matching the filter.
    // In the future, we may trim tests that are excluded because of
    // sharding.
    return matches_filter_;
  }

  // Returns the result of the test.
  const TestResult* result() const { return &result_; }

 private:
#if GTEST_HAS_DEATH_TEST
  friend class internal::DefaultDeathTestFactory;
#endif  // GTEST_HAS_DEATH_TEST
  friend class Test;
  friend class TestCase;
  friend class internal::UnitTestImpl;
  friend class internal::StreamingListenerTest;
  friend TestInfo* internal::MakeAndRegisterTestInfo(
      const char* test_case_name,
      const char* name,
      const char* type_param,
      const char* value_param,
      internal::CodeLocation code_location,
      internal::TypeId fixture_class_id,
      Test::SetUpTestCaseFunc set_up_tc,
      Test::TearDownTestCaseFunc tear_down_tc,
      internal::TestFactoryBase* factory);

  // Constructs a TestInfo object. The newly constructed instance assumes
  // ownership of the factory object.
  TestInfo(const std::string& test_case_name,
           const std::string& name,
           const char* a_type_param,   // NULL if not a type-parameterized test
           const char* a_value_param,  // NULL if not a value-parameterized test
           internal::CodeLocation a_code_location,
           internal::TypeId fixture_class_id,
           internal::TestFactoryBase* factory);

  // Increments the number of death tests encountered in this test so
  // far.
  int increment_death_test_count() {
    return result_.increment_death_test_count();
  }

  // Creates the test object, runs it, records its result, and then
  // deletes it.
  void Run();

  static void ClearTestResult(TestInfo* test_info) {
    test_info->result_.Clear();
  }

  // These fields are immutable properties of the test.
  const std::string test_case_name_;     // Test case name
  const std::string name_;               // Test name
  // Name of the parameter type, or NULL if this is not a typed or a
  // type-parameterized test.
  const internal::scoped_ptr<const ::std::string> type_param_;
  // Text representation of the value parameter, or NULL if this is not a
  // value-parameterized test.
  const internal::scoped_ptr<const ::std::string> value_param_;
  internal::CodeLocation location_;
  const internal::TypeId fixture_class_id_;   // ID of the test fixture class
  bool should_run_;                 // True iff this test should run
  bool is_disabled_;                // True iff this test is disabled
  bool matches_filter_;             // True if this test matches the
                                    // user-specified filter.
  internal::TestFactoryBase* const factory_;  // The factory that creates
                                              // the test object

  // This field is mutable and needs to be reset before running the
  // test for the second time.
  TestResult result_;

  GTEST_DISALLOW_COPY_AND_ASSIGN_(TestInfo);
};

// A test case, which consists of a vector of TestInfos.
//
// TestCase is not copyable.
class GTEST_API_ TestCase {
 public:
  // Creates a TestCase with the given name.
  //
  // TestCase does NOT have a default constructor.  Always use this
  // constructor to create a TestCase object.
  //
  // Arguments:
  //
  //   name:         name of the test case
  //   a_type_param: the name of the test's type parameter, or NULL if
  //                 this is not a type-parameterized test.
  //   set_up_tc:    pointer to the function that sets up the test case
  //   tear_down_tc: pointer to the function that tears down the test case
  TestCase(const char* name, const char* a_type_param,
           Test::SetUpTestCaseFunc set_up_tc,
           Test::TearDownTestCaseFunc tear_down_tc);

  // Destructor of TestCase.
  virtual ~TestCase();

  // Gets the name of the TestCase.
  const char* name() const { return name_.c_str(); }

  // Returns the name of the parameter type, or NULL if this is not a
  // type-parameterized test case.
  const char* type_param() const {
    if (type_param_.get() != NULL)
      return type_param_->c_str();
    return NULL;
  }

  // Returns true if any test in this test case should run.
  bool should_run() const { return should_run_; }

  // Gets the number of successful tests in this test case.
  int successful_test_count() const;

  // Gets the number of failed tests in this test case.
  int failed_test_count() const;

  // Gets the number of disabled tests that will be reported in the XML report.
  int reportable_disabled_test_count() const;

  // Gets the number of disabled tests in this test case.
  int disabled_test_count() const;

  // Gets the number of tests to be printed in the XML report.
  int reportable_test_count() const;

  // Get the number of tests in this test case that should run.
  int test_to_run_count() const;

  // Gets the number of all tests in this test case.
  int total_test_count() const;

  // Returns true iff the test case passed.
  bool Passed() const { return !Failed(); }

  // Returns true iff the test case failed.
  bool Failed() const { return failed_test_count() > 0; }

  // Returns the elapsed time, in milliseconds.
  TimeInMillis elapsed_time() const { return elapsed_time_; }

  // Returns the i-th test among all the tests. i can range from 0 to
  // total_test_count() - 1. If i is not in that range, returns NULL.
  const TestInfo* GetTestInfo(int i) const;

  // Returns the TestResult that holds test properties recorded during
  // execution of SetUpTestCase and TearDownTestCase.
  const TestResult& ad_hoc_test_result() const { return ad_hoc_test_result_; }

 private:
  friend class Test;
  friend class internal::UnitTestImpl;

  // Gets the (mutable) vector of TestInfos in this TestCase.
  std::vector<TestInfo*>& test_info_list() { return test_info_list_; }

  // Gets the (immutable) vector of TestInfos in this TestCase.
  const std::vector<TestInfo*>& test_info_list() const {
    return test_info_list_;
  }

  // Returns the i-th test among all the tests. i can range from 0 to
  // total_test_count() - 1. If i is not in that range, returns NULL.
  TestInfo* GetMutableTestInfo(int i);

  // Sets the should_run member.
  void set_should_run(bool should) { should_run_ = should; }

  // Adds a TestInfo to this test case.  Will delete the TestInfo upon
  // destruction of the TestCase object.
  void AddTestInfo(TestInfo * test_info);

  // Clears the results of all tests in this test case.
  void ClearResult();

  // Clears the results of all tests in the given test case.
  static void ClearTestCaseResult(TestCase* test_case) {
    test_case->ClearResult();
  }

  // Runs every test in this TestCase.
  void Run();

  // Runs SetUpTestCase() for this TestCase.  This wrapper is needed
  // for catching exceptions thrown from SetUpTestCase().
  void RunSetUpTestCase() { (*set_up_tc_)(); }

  // Runs TearDownTestCase() for this TestCase.  This wrapper is
  // needed for catching exceptions thrown from TearDownTestCase().
  void RunTearDownTestCase() { (*tear_down_tc_)(); }

  // Returns true iff test passed.
  static bool TestPassed(const TestInfo* test_info) {
    return test_info->should_run() && test_info->result()->Passed();
  }

  // Returns true iff test failed.
  static bool TestFailed(const TestInfo* test_info) {
    return test_info->should_run() && test_info->result()->Failed();
  }

  // Returns true iff the test is disabled and will be reported in the XML
  // report.
  static bool TestReportableDisabled(const TestInfo* test_info) {
    return test_info->is_reportable() && test_info->is_disabled_;
  }

  // Returns true iff test is disabled.
  static bool TestDisabled(const TestInfo* test_info) {
    return test_info->is_disabled_;
  }

  // Returns true iff this test will appear in the XML report.
  static bool TestReportable(const TestInfo* test_info) {
    return test_info->is_reportable();
  }

  // Returns true if the given test should run.
  static bool ShouldRunTest(const TestInfo* test_info) {
    return test_info->should_run();
  }

  // Shuffles the tests in this test case.
  void ShuffleTests(internal::Random* random);

  // Restores the test order to before the first shuffle.
  void UnshuffleTests();

  // Name of the test case.
  std::string name_;
  // Name of the parameter type, or NULL if this is not a typed or a
  // type-parameterized test.
  const internal::scoped_ptr<const ::std::string> type_param_;
  // The vector of TestInfos in their original order.  It owns the
  // elements in the vector.
  std::vector<TestInfo*> test_info_list_;
  // Provides a level of indirection for the test list to allow easy
  // shuffling and restoring the test order.  The i-th element in this
  // vector is the index of the i-th test in the shuffled test list.
  std::vector<int> test_indices_;
  // Pointer to the function that sets up the test case.
  Test::SetUpTestCaseFunc set_up_tc_;
  // Pointer to the function that tears down the test case.
  Test::TearDownTestCaseFunc tear_down_tc_;
  // True iff any test in this test case should run.
  bool should_run_;
  // Elapsed time, in milliseconds.
  TimeInMillis elapsed_time_;
  // Holds test properties recorded during execution of SetUpTestCase and
  // TearDownTestCase.
  TestResult ad_hoc_test_result_;

  // We disallow copying TestCases.
  GTEST_DISALLOW_COPY_AND_ASSIGN_(TestCase);
};

// An Environment object is capable of setting up and tearing down an
// environment.  You should subclass this to define your own
// environment(s).
//
// An Environment object does the set-up and tear-down in virtual
// methods SetUp() and TearDown() instead of the constructor and the
// destructor, as:
//
//   1. You cannot safely throw from a destructor.  This is a problem
//      as in some cases Google Test is used where exceptions are enabled, and
//      we may want to implement ASSERT_* using exceptions where they are
//      available.
//   2. You cannot use ASSERT_* directly in a constructor or
//      destructor.
class Environment {
 public:
  // The d'tor is virtual as we need to subclass Environment.
  virtual ~Environment() {}

  // Override this to define how to set up the environment.
  virtual void SetUp() {}

  // Override this to define how to tear down the environment.
  virtual void TearDown() {}
 private:
  // If you see an error about overriding the following function or
  // about it being private, you have mis-spelled SetUp() as Setup().
  struct Setup_should_be_spelled_SetUp {};
  virtual Setup_should_be_spelled_SetUp* Setup() { return NULL; }
};

// The interface for tracing execution of tests. The methods are organized in
// the order the corresponding events are fired.
class TestEventListener {
 public:
  virtual ~TestEventListener() {}

  // Fired before any test activity starts.
  virtual void OnTestProgramStart(const UnitTest& unit_test) = 0;

  // Fired before each iteration of tests starts.  There may be more than
  // one iteration if GTEST_FLAG(repeat) is set. iteration is the iteration
  // index, starting from 0.
  virtual void OnTestIterationStart(const UnitTest& unit_test,
                                    int iteration) = 0;

  // Fired before environment set-up for each iteration of tests starts.
  virtual void OnEnvironmentsSetUpStart(const UnitTest& unit_test) = 0;

  // Fired after environment set-up for each iteration of tests ends.
  virtual void OnEnvironmentsSetUpEnd(const UnitTest& unit_test) = 0;

  // Fired before the test case starts.
  virtual void OnTestCaseStart(const TestCase& test_case) = 0;

  // Fired before the test starts.
  virtual void OnTestStart(const TestInfo& test_info) = 0;

  // Fired after a failed assertion or a SUCCEED() invocation.
  virtual void OnTestPartResult(const TestPartResult& test_part_result) = 0;

  // Fired after the test ends.
  virtual void OnTestEnd(const TestInfo& test_info) = 0;

  // Fired after the test case ends.
  virtual void OnTestCaseEnd(const TestCase& test_case) = 0;

  // Fired before environment tear-down for each iteration of tests starts.
  virtual void OnEnvironmentsTearDownStart(const UnitTest& unit_test) = 0;

  // Fired after environment tear-down for each iteration of tests ends.
  virtual void OnEnvironmentsTearDownEnd(const UnitTest& unit_test) = 0;

  // Fired after each iteration of tests finishes.
  virtual void OnTestIterationEnd(const UnitTest& unit_test,
                                  int iteration) = 0;

  // Fired after all test activities have ended.
  virtual void OnTestProgramEnd(const UnitTest& unit_test) = 0;
};

// The convenience class for users who need to override just one or two
// methods and are not concerned that a possible change to a signature of
// the methods they override will not be caught during the build.  For
// comments about each method please see the definition of TestEventListener
// above.
class EmptyTestEventListener : public TestEventListener {
 public:
  virtual void OnTestProgramStart(const UnitTest& /*unit_test*/) {}
  virtual void OnTestIterationStart(const UnitTest& /*unit_test*/,
                                    int /*iteration*/) {}
  virtual void OnEnvironmentsSetUpStart(const UnitTest& /*unit_test*/) {}
  virtual void OnEnvironmentsSetUpEnd(const UnitTest& /*unit_test*/) {}
  virtual void OnTestCaseStart(const TestCase& /*test_case*/) {}
  virtual void OnTestStart(const TestInfo& /*test_info*/) {}
  virtual void OnTestPartResult(const TestPartResult& /*test_part_result*/) {}
  virtual void OnTestEnd(const TestInfo& /*test_info*/) {}
  virtual void OnTestCaseEnd(const TestCase& /*test_case*/) {}
  virtual void OnEnvironmentsTearDownStart(const UnitTest& /*unit_test*/) {}
  virtual void OnEnvironmentsTearDownEnd(const UnitTest& /*unit_test*/) {}
  virtual void OnTestIterationEnd(const UnitTest& /*unit_test*/,
                                  int /*iteration*/) {}
  virtual void OnTestProgramEnd(const UnitTest& /*unit_test*/) {}
};

// TestEventListeners lets users add listeners to track events in Google Test.
class GTEST_API_ TestEventListeners {
 public:
  TestEventListeners();
  ~TestEventListeners();

  // Appends an event listener to the end of the list. Google Test assumes
  // the ownership of the listener (i.e. it will delete the listener when
  // the test program finishes).
  void Append(TestEventListener* listener);

  // Removes the given event listener from the list and returns it.  It then
  // becomes the caller's responsibility to delete the listener. Returns
  // NULL if the listener is not found in the list.
  TestEventListener* Release(TestEventListener* listener);

  // Returns the standard listener responsible for the default console
  // output.  Can be removed from the listeners list to shut down default
  // console output.  Note that removing this object from the listener list
  // with Release transfers its ownership to the caller and makes this
  // function return NULL the next time.
  TestEventListener* default_result_printer() const {
    return default_result_printer_;
  }

  // Returns the standard listener responsible for the default XML output
  // controlled by the --gtest_output=xml flag.  Can be removed from the
  // listeners list by users who want to shut down the default XML output
  // controlled by this flag and substitute it with custom one.  Note that
  // removing this object from the listener list with Release transfers its
  // ownership to the caller and makes this function return NULL the next
  // time.
  TestEventListener* default_xml_generator() const {
    return default_xml_generator_;
  }

 private:
  friend class TestCase;
  friend class TestInfo;
  friend class internal::DefaultGlobalTestPartResultReporter;
  friend class internal::NoExecDeathTest;
  friend class internal::TestEventListenersAccessor;
  friend class internal::UnitTestImpl;

  // Returns repeater that broadcasts the TestEventListener events to all
  // subscribers.
  TestEventListener* repeater();

  // Sets the default_result_printer attribute to the provided listener.
  // The listener is also added to the listener list and previous
  // default_result_printer is removed from it and deleted. The listener can
  // also be NULL in which case it will not be added to the list. Does
  // nothing if the previous and the current listener objects are the same.
  void SetDefaultResultPrinter(TestEventListener* listener);

  // Sets the default_xml_generator attribute to the provided listener.  The
  // listener is also added to the listener list and previous
  // default_xml_generator is removed from it and deleted. The listener can
  // also be NULL in which case it will not be added to the list. Does
  // nothing if the previous and the current listener objects are the same.
  void SetDefaultXmlGenerator(TestEventListener* listener);

  // Controls whether events will be forwarded by the repeater to the
  // listeners in the list.
  bool EventForwardingEnabled() const;
  void SuppressEventForwarding();

  // The actual list of listeners.
  internal::TestEventRepeater* repeater_;
  // Listener responsible for the standard result output.
  TestEventListener* default_result_printer_;
  // Listener responsible for the creation of the XML output file.
  TestEventListener* default_xml_generator_;

  // We disallow copying TestEventListeners.
  GTEST_DISALLOW_COPY_AND_ASSIGN_(TestEventListeners);
};

// A UnitTest consists of a vector of TestCases.
//
// This is a singleton class.  The only instance of UnitTest is
// created when UnitTest::GetInstance() is first called.  This
// instance is never deleted.
//
// UnitTest is not copyable.
//
// This class is thread-safe as long as the methods are called
// according to their specification.
class GTEST_API_ UnitTest {
 public:
  // Gets the singleton UnitTest object.  The first time this method
  // is called, a UnitTest object is constructed and returned.
  // Consecutive calls will return the same object.
  static UnitTest* GetInstance();

  // Runs all tests in this UnitTest object and prints the result.
  // Returns 0 if successful, or 1 otherwise.
  //
  // This method can only be called from the main thread.
  //
  // INTERNAL IMPLEMENTATION - DO NOT USE IN A USER PROGRAM.
  int Run() GTEST_MUST_USE_RESULT_;

  // Returns the working directory when the first TEST() or TEST_F()
  // was executed.  The UnitTest object owns the string.
  const char* original_working_dir() const;

  // Returns the TestCase object for the test that's currently running,
  // or NULL if no test is running.
  const TestCase* current_test_case() const
      GTEST_LOCK_EXCLUDED_(mutex_);

  // Returns the TestInfo object for the test that's currently running,
  // or NULL if no test is running.
  const TestInfo* current_test_info() const
      GTEST_LOCK_EXCLUDED_(mutex_);

  // Returns the random seed used at the start of the current test run.
  int random_seed() const;

#if GTEST_HAS_PARAM_TEST
  // Returns the ParameterizedTestCaseRegistry object used to keep track of
  // value-parameterized tests and instantiate and register them.
  //
  // INTERNAL IMPLEMENTATION - DO NOT USE IN A USER PROGRAM.
  internal::ParameterizedTestCaseRegistry& parameterized_test_registry()
      GTEST_LOCK_EXCLUDED_(mutex_);
#endif  // GTEST_HAS_PARAM_TEST

  // Gets the number of successful test cases.
  int successful_test_case_count() const;

  // Gets the number of failed test cases.
  int failed_test_case_count() const;

  // Gets the number of all test cases.
  int total_test_case_count() const;

  // Gets the number of all test cases that contain at least one test
  // that should run.
  int test_case_to_run_count() const;

  // Gets the number of successful tests.
  int successful_test_count() const;

  // Gets the number of failed tests.
  int failed_test_count() const;

  // Gets the number of disabled tests that will be reported in the XML report.
  int reportable_disabled_test_count() const;

  // Gets the number of disabled tests.
  int disabled_test_count() const;

  // Gets the number of tests to be printed in the XML report.
  int reportable_test_count() const;

  // Gets the number of all tests.
  int total_test_count() const;

  // Gets the number of tests that should run.
  int test_to_run_count() const;

  // Gets the time of the test program start, in ms from the start of the
  // UNIX epoch.
  TimeInMillis start_timestamp() const;

  // Gets the elapsed time, in milliseconds.
  TimeInMillis elapsed_time() const;

  // Returns true iff the unit test passed (i.e. all test cases passed).
  bool Passed() const;

  // Returns true iff the unit test failed (i.e. some test case failed
  // or something outside of all tests failed).
  bool Failed() const;

  // Gets the i-th test case among all the test cases. i can range from 0 to
  // total_test_case_count() - 1. If i is not in that range, returns NULL.
  const TestCase* GetTestCase(int i) const;

  // Returns the TestResult containing information on test failures and
  // properties logged outside of individual test cases.
  const TestResult& ad_hoc_test_result() const;

  // Returns the list of event listeners that can be used to track events
  // inside Google Test.
  TestEventListeners& listeners();

 private:
  // Registers and returns a global test environment.  When a test
  // program is run, all global test environments will be set-up in
  // the order they were registered.  After all tests in the program
  // have finished, all global test environments will be torn-down in
  // the *reverse* order they were registered.
  //
  // The UnitTest object takes ownership of the given environment.
  //
  // This method can only be called from the main thread.
  Environment* AddEnvironment(Environment* env);

  // Adds a TestPartResult to the current TestResult object.  All
  // Google Test assertion macros (e.g. ASSERT_TRUE, EXPECT_EQ, etc)
  // eventually call this to report their results.  The user code
  // should use the assertion macros instead of calling this directly.
  void AddTestPartResult(TestPartResult::Type result_type,
                         const char* file_name,
                         int line_number,
                         const std::string& message,
                         const std::string& os_stack_trace)
      GTEST_LOCK_EXCLUDED_(mutex_);

  // Adds a TestProperty to the current TestResult object when invoked from
  // inside a test, to current TestCase's ad_hoc_test_result_ when invoked
  // from SetUpTestCase or TearDownTestCase, or to the global property set
  // when invoked elsewhere.  If the result already contains a property with
  // the same key, the value will be updated.
  void RecordProperty(const std::string& key, const std::string& value);

  // Gets the i-th test case among all the test cases. i can range from 0 to
  // total_test_case_count() - 1. If i is not in that range, returns NULL.
  TestCase* GetMutableTestCase(int i);

  // Accessors for the implementation object.
  internal::UnitTestImpl* impl() { return impl_; }
  const internal::UnitTestImpl* impl() const { return impl_; }

  // These classes and funcions are friends as they need to access private
  // members of UnitTest.
  friend class Test;
  friend class internal::AssertHelper;
  friend class internal::ScopedTrace;
  friend class internal::StreamingListenerTest;
  friend class internal::UnitTestRecordPropertyTestHelper;
  friend Environment* AddGlobalTestEnvironment(Environment* env);
  friend internal::UnitTestImpl* internal::GetUnitTestImpl();
  friend void internal::ReportFailureInUnknownLocation(
      TestPartResult::Type result_type,
      const std::string& message);

  // Creates an empty UnitTest.
  UnitTest();

  // D'tor
  virtual ~UnitTest();

  // Pushes a trace defined by SCOPED_TRACE() on to the per-thread
  // Google Test trace stack.
  void PushGTestTrace(const internal::TraceInfo& trace)
      GTEST_LOCK_EXCLUDED_(mutex_);

  // Pops a trace from the per-thread Google Test trace stack.
  void PopGTestTrace()
      GTEST_LOCK_EXCLUDED_(mutex_);

  // Protects mutable state in *impl_.  This is mutable as some const
  // methods need to lock it too.
  mutable internal::Mutex mutex_;

  // Opaque implementation object.  This field is never changed once
  // the object is constructed.  We don't mark it as const here, as
  // doing so will cause a warning in the constructor of UnitTest.
  // Mutable state in *impl_ is protected by mutex_.
  internal::UnitTestImpl* impl_;

  // We disallow copying UnitTest.
  GTEST_DISALLOW_COPY_AND_ASSIGN_(UnitTest);
};

// A convenient wrapper for adding an environment for the test
// program.
//
// You should call this before RUN_ALL_TESTS() is called, probably in
// main().  If you use gtest_main, you need to call this before main()
// starts for it to take effect.  For example, you can define a global
// variable like this:
//
//   testing::Environment* const foo_env =
//       testing::AddGlobalTestEnvironment(new FooEnvironment);
//
// However, we strongly recommend you to write your own main() and
// call AddGlobalTestEnvironment() there, as relying on initialization
// of global variables makes the code harder to read and may cause
// problems when you register multiple environments from different
// translation units and the environments have dependencies among them
// (remember that the compiler doesn't guarantee the order in which
// global variables from different translation units are initialized).
inline Environment* AddGlobalTestEnvironment(Environment* env) {
  return UnitTest::GetInstance()->AddEnvironment(env);
}

// Initializes Google Test.  This must be called before calling
// RUN_ALL_TESTS().  In particular, it parses a command line for the
// flags that Google Test recognizes.  Whenever a Google Test flag is
// seen, it is removed from argv, and *argc is decremented.
//
// No value is returned.  Instead, the Google Test flag variables are
// updated.
//
// Calling the function for the second time has no user-visible effect.
GTEST_API_ void InitGoogleTest(int* argc, char** argv);

// This overloaded version can be used in Windows programs compiled in
// UNICODE mode.
GTEST_API_ void InitGoogleTest(int* argc, wchar_t** argv);

namespace internal {

// Separate the error generating code from the code path to reduce the stack
// frame size of CmpHelperEQ. This helps reduce the overhead of some sanitizers
// when calling EXPECT_* in a tight loop.
template <typename T1, typename T2>
AssertionResult CmpHelperEQFailure(const char* expected_expression,
                                   const char* actual_expression,
                                   const T1& expected, const T2& actual) {
  return EqFailure(expected_expression,
                   actual_expression,
                   FormatForComparisonFailureMessage(expected, actual),
                   FormatForComparisonFailureMessage(actual, expected),
                   false);
}

// The helper function for {ASSERT|EXPECT}_EQ.
template <typename T1, typename T2>
AssertionResult CmpHelperEQ(const char* expected_expression,
                            const char* actual_expression,
                            const T1& expected,
                            const T2& actual) {
GTEST_DISABLE_MSC_WARNINGS_PUSH_(4389 /* signed/unsigned mismatch */)
  if (expected == actual) {
    return AssertionSuccess();
  }
GTEST_DISABLE_MSC_WARNINGS_POP_()

  return CmpHelperEQFailure(expected_expression, actual_expression, expected,
                            actual);
}

// With this overloaded version, we allow anonymous enums to be used
// in {ASSERT|EXPECT}_EQ when compiled with gcc 4, as anonymous enums
// can be implicitly cast to BiggestInt.
GTEST_API_ AssertionResult CmpHelperEQ(const char* expected_expression,
                                       const char* actual_expression,
                                       BiggestInt expected,
                                       BiggestInt actual);

// The helper class for {ASSERT|EXPECT}_EQ.  The template argument
// lhs_is_null_literal is true iff the first argument to ASSERT_EQ()
// is a null pointer literal.  The following default implementation is
// for lhs_is_null_literal being false.
template <bool lhs_is_null_literal>
class EqHelper {
 public:
  // This templatized version is for the general case.
  template <typename T1, typename T2>
  static AssertionResult Compare(const char* expected_expression,
                                 const char* actual_expression,
                                 const T1& expected,
                                 const T2& actual) {
    return CmpHelperEQ(expected_expression, actual_expression, expected,
                       actual);
  }

  // With this overloaded version, we allow anonymous enums to be used
  // in {ASSERT|EXPECT}_EQ when compiled with gcc 4, as anonymous
  // enums can be implicitly cast to BiggestInt.
  //
  // Even though its body looks the same as the above version, we
  // cannot merge the two, as it will make anonymous enums unhappy.
  static AssertionResult Compare(const char* expected_expression,
                                 const char* actual_expression,
                                 BiggestInt expected,
                                 BiggestInt actual) {
    return CmpHelperEQ(expected_expression, actual_expression, expected,
                       actual);
  }
};

// This specialization is used when the first argument to ASSERT_EQ()
// is a null pointer literal, like NULL, false, or 0.
template <>
class EqHelper<true> {
 public:
  // We define two overloaded versions of Compare().  The first
  // version will be picked when the second argument to ASSERT_EQ() is
  // NOT a pointer, e.g. ASSERT_EQ(0, AnIntFunction()) or
  // EXPECT_EQ(false, a_bool).
  template <typename T1, typename T2>
  static AssertionResult Compare(
      const char* expected_expression,
      const char* actual_expression,
      const T1& expected,
      const T2& actual,
      // The following line prevents this overload from being considered if T2
      // is not a pointer type.  We need this because ASSERT_EQ(NULL, my_ptr)
      // expands to Compare("", "", NULL, my_ptr), which requires a conversion
      // to match the Secret* in the other overload, which would otherwise make
      // this template match better.
      typename EnableIf<!is_pointer<T2>::value>::type* = 0) {
    return CmpHelperEQ(expected_expression, actual_expression, expected,
                       actual);
  }

  // This version will be picked when the second argument to ASSERT_EQ() is a
  // pointer, e.g. ASSERT_EQ(NULL, a_pointer).
  template <typename T>
  static AssertionResult Compare(
      const char* expected_expression,
      const char* actual_expression,
      // We used to have a second template parameter instead of Secret*.  That
      // template parameter would deduce to 'long', making this a better match
      // than the first overload even without the first overload's EnableIf.
      // Unfortunately, gcc with -Wconversion-null warns when "passing NULL to
      // non-pointer argument" (even a deduced integral argument), so the old
      // implementation caused warnings in user code.
      Secret* /* expected (NULL) */,
      T* actual) {
    // We already know that 'expected' is a null pointer.
    return CmpHelperEQ(expected_expression, actual_expression,
                       static_cast<T*>(NULL), actual);
  }
};

// Separate the error generating code from the code path to reduce the stack
// frame size of CmpHelperOP. This helps reduce the overhead of some sanitizers
// when calling EXPECT_OP in a tight loop.
template <typename T1, typename T2>
AssertionResult CmpHelperOpFailure(const char* expr1, const char* expr2,
                                   const T1& val1, const T2& val2,
                                   const char* op) {
  return AssertionFailure()
         << "Expected: (" << expr1 << ") " << op << " (" << expr2
         << "), actual: " << FormatForComparisonFailureMessage(val1, val2)
         << " vs " << FormatForComparisonFailureMessage(val2, val1);
}

// A macro for implementing the helper functions needed to implement
// ASSERT_?? and EXPECT_??.  It is here just to avoid copy-and-paste
// of similar code.
//
// For each templatized helper function, we also define an overloaded
// version for BiggestInt in order to reduce code bloat and allow
// anonymous enums to be used with {ASSERT|EXPECT}_?? when compiled
// with gcc 4.
//
// INTERNAL IMPLEMENTATION - DO NOT USE IN A USER PROGRAM.

#define GTEST_IMPL_CMP_HELPER_(op_name, op)\
template <typename T1, typename T2>\
AssertionResult CmpHelper##op_name(const char* expr1, const char* expr2, \
                                   const T1& val1, const T2& val2) {\
  if (val1 op val2) {\
    return AssertionSuccess();\
  } else {\
    return CmpHelperOpFailure(expr1, expr2, val1, val2, #op);\
  }\
}\
GTEST_API_ AssertionResult CmpHelper##op_name(\
    const char* expr1, const char* expr2, BiggestInt val1, BiggestInt val2)

// INTERNAL IMPLEMENTATION - DO NOT USE IN A USER PROGRAM.

// Implements the helper function for {ASSERT|EXPECT}_NE
GTEST_IMPL_CMP_HELPER_(NE, !=);
// Implements the helper function for {ASSERT|EXPECT}_LE
GTEST_IMPL_CMP_HELPER_(LE, <=);
// Implements the helper function for {ASSERT|EXPECT}_LT
GTEST_IMPL_CMP_HELPER_(LT, <);
// Implements the helper function for {ASSERT|EXPECT}_GE
GTEST_IMPL_CMP_HELPER_(GE, >=);
// Implements the helper function for {ASSERT|EXPECT}_GT
GTEST_IMPL_CMP_HELPER_(GT, >);

#undef GTEST_IMPL_CMP_HELPER_

// The helper function for {ASSERT|EXPECT}_STREQ.
//
// INTERNAL IMPLEMENTATION - DO NOT USE IN A USER PROGRAM.
GTEST_API_ AssertionResult CmpHelperSTREQ(const char* expected_expression,
                                          const char* actual_expression,
                                          const char* expected,
                                          const char* actual);

// The helper function for {ASSERT|EXPECT}_STRCASEEQ.
//
// INTERNAL IMPLEMENTATION - DO NOT USE IN A USER PROGRAM.
GTEST_API_ AssertionResult CmpHelperSTRCASEEQ(const char* expected_expression,
                                              const char* actual_expression,
                                              const char* expected,
                                              const char* actual);

// The helper function for {ASSERT|EXPECT}_STRNE.
//
// INTERNAL IMPLEMENTATION - DO NOT USE IN A USER PROGRAM.
GTEST_API_ AssertionResult CmpHelperSTRNE(const char* s1_expression,
                                          const char* s2_expression,
                                          const char* s1,
                                          const char* s2);

// The helper function for {ASSERT|EXPECT}_STRCASENE.
//
// INTERNAL IMPLEMENTATION - DO NOT USE IN A USER PROGRAM.
GTEST_API_ AssertionResult CmpHelperSTRCASENE(const char* s1_expression,
                                              const char* s2_expression,
                                              const char* s1,
                                              const char* s2);


// Helper function for *_STREQ on wide strings.
//
// INTERNAL IMPLEMENTATION - DO NOT USE IN A USER PROGRAM.
GTEST_API_ AssertionResult CmpHelperSTREQ(const char* expected_expression,
                                          const char* actual_expression,
                                          const wchar_t* expected,
                                          const wchar_t* actual);

// Helper function for *_STRNE on wide strings.
//
// INTERNAL IMPLEMENTATION - DO NOT USE IN A USER PROGRAM.
GTEST_API_ AssertionResult CmpHelperSTRNE(const char* s1_expression,
                                          const char* s2_expression,
                                          const wchar_t* s1,
                                          const wchar_t* s2);

}  // namespace internal

// IsSubstring() and IsNotSubstring() are intended to be used as the
// first argument to {EXPECT,ASSERT}_PRED_FORMAT2(), not by
// themselves.  They check whether needle is a substring of haystack
// (NULL is considered a substring of itself only), and return an
// appropriate error message when they fail.
//
// The {needle,haystack}_expr arguments are the stringified
// expressions that generated the two real arguments.
GTEST_API_ AssertionResult IsSubstring(
    const char* needle_expr, const char* haystack_expr,
    const char* needle, const char* haystack);
GTEST_API_ AssertionResult IsSubstring(
    const char* needle_expr, const char* haystack_expr,
    const wchar_t* needle, const wchar_t* haystack);
GTEST_API_ AssertionResult IsNotSubstring(
    const char* needle_expr, const char* haystack_expr,
    const char* needle, const char* haystack);
GTEST_API_ AssertionResult IsNotSubstring(
    const char* needle_expr, const char* haystack_expr,
    const wchar_t* needle, const wchar_t* haystack);
GTEST_API_ AssertionResult IsSubstring(
    const char* needle_expr, const char* haystack_expr,
    const ::std::string& needle, const ::std::string& haystack);
GTEST_API_ AssertionResult IsNotSubstring(
    const char* needle_expr, const char* haystack_expr,
    const ::std::string& needle, const ::std::string& haystack);

#if GTEST_HAS_STD_WSTRING
GTEST_API_ AssertionResult IsSubstring(
    const char* needle_expr, const char* haystack_expr,
    const ::std::wstring& needle, const ::std::wstring& haystack);
GTEST_API_ AssertionResult IsNotSubstring(
    const char* needle_expr, const char* haystack_expr,
    const ::std::wstring& needle, const ::std::wstring& haystack);
#endif  // GTEST_HAS_STD_WSTRING

namespace internal {

// Helper template function for comparing floating-points.
//
// Template parameter:
//
//   RawType: the raw floating-point type (either float or double)
//
// INTERNAL IMPLEMENTATION - DO NOT USE IN A USER PROGRAM.
template <typename RawType>
AssertionResult CmpHelperFloatingPointEQ(const char* expected_expression,
                                         const char* actual_expression,
                                         RawType expected,
                                         RawType actual) {
  const FloatingPoint<RawType> lhs(expected), rhs(actual);

  if (lhs.AlmostEquals(rhs)) {
    return AssertionSuccess();
  }

  ::std::stringstream expected_ss;
  expected_ss << std::setprecision(std::numeric_limits<RawType>::digits10 + 2)
              << expected;

  ::std::stringstream actual_ss;
  actual_ss << std::setprecision(std::numeric_limits<RawType>::digits10 + 2)
            << actual;

  return EqFailure(expected_expression,
                   actual_expression,
                   StringStreamToString(&expected_ss),
                   StringStreamToString(&actual_ss),
                   false);
}

// Helper function for implementing ASSERT_NEAR.
//
// INTERNAL IMPLEMENTATION - DO NOT USE IN A USER PROGRAM.
GTEST_API_ AssertionResult DoubleNearPredFormat(const char* expr1,
                                                const char* expr2,
                                                const char* abs_error_expr,
                                                double val1,
                                                double val2,
                                                double abs_error);

// INTERNAL IMPLEMENTATION - DO NOT USE IN USER CODE.
// A class that enables one to stream messages to assertion macros
class GTEST_API_ AssertHelper {
 public:
  // Constructor.
  AssertHelper(TestPartResult::Type type,
               const char* file,
               int line,
               const char* message);
  ~AssertHelper();

  // Message assignment is a semantic trick to enable assertion
  // streaming; see the GTEST_MESSAGE_ macro below.
  void operator=(const Message& message) const;

 private:
  // We put our data in a struct so that the size of the AssertHelper class can
  // be as small as possible.  This is important because gcc is incapable of
  // re-using stack space even for temporary variables, so every EXPECT_EQ
  // reserves stack space for another AssertHelper.
  struct AssertHelperData {
    AssertHelperData(TestPartResult::Type t,
                     const char* srcfile,
                     int line_num,
                     const char* msg)
        : type(t), file(srcfile), line(line_num), message(msg) { }

    TestPartResult::Type const type;
    const char* const file;
    int const line;
    std::string const message;

   private:
    GTEST_DISALLOW_COPY_AND_ASSIGN_(AssertHelperData);
  };

  AssertHelperData* const data_;

  GTEST_DISALLOW_COPY_AND_ASSIGN_(AssertHelper);
};

}  // namespace internal

#if GTEST_HAS_PARAM_TEST
// The pure interface class that all value-parameterized tests inherit from.
// A value-parameterized class must inherit from both ::testing::Test and
// ::testing::WithParamInterface. In most cases that just means inheriting
// from ::testing::TestWithParam, but more complicated test hierarchies
// may need to inherit from Test and WithParamInterface at different levels.
//
// This interface has support for accessing the test parameter value via
// the GetParam() method.
//
// Use it with one of the parameter generator defining functions, like Range(),
// Values(), ValuesIn(), Bool(), and Combine().
//
// class FooTest : public ::testing::TestWithParam<int> {
//  protected:
//   FooTest() {
//     // Can use GetParam() here.
//   }
//   virtual ~FooTest() {
//     // Can use GetParam() here.
//   }
//   virtual void SetUp() {
//     // Can use GetParam() here.
//   }
//   virtual void TearDown {
//     // Can use GetParam() here.
//   }
// };
// TEST_P(FooTest, DoesBar) {
//   // Can use GetParam() method here.
//   Foo foo;
//   ASSERT_TRUE(foo.DoesBar(GetParam()));
// }
// INSTANTIATE_TEST_CASE_P(OneToTenRange, FooTest, ::testing::Range(1, 10));

template <typename T>
class WithParamInterface {
 public:
  typedef T ParamType;
  virtual ~WithParamInterface() {}

  // The current parameter value. Is also available in the test fixture's
  // constructor. This member function is non-static, even though it only
  // references static data, to reduce the opportunity for incorrect uses
  // like writing 'WithParamInterface<bool>::GetParam()' for a test that
  // uses a fixture whose parameter type is int.
  const ParamType& GetParam() const {
    GTEST_CHECK_(parameter_ != NULL)
        << "GetParam() can only be called inside a value-parameterized test "
        << "-- did you intend to write TEST_P instead of TEST_F?";
    return *parameter_;
  }

 private:
  // Sets parameter value. The caller is responsible for making sure the value
  // remains alive and unchanged throughout the current test.
  static void SetParam(const ParamType* parameter) {
    parameter_ = parameter;
  }

  // Static value used for accessing parameter during a test lifetime.
  static const ParamType* parameter_;

  // TestClass must be a subclass of WithParamInterface<T> and Test.
  template <class TestClass> friend class internal::ParameterizedTestFactory;
};

template <typename T>
const T* WithParamInterface<T>::parameter_ = NULL;

// Most value-parameterized classes can ignore the existence of
// WithParamInterface, and can just inherit from ::testing::TestWithParam.

template <typename T>
class TestWithParam : public Test, public WithParamInterface<T> {
};

#endif  // GTEST_HAS_PARAM_TEST

// Macros for indicating success/failure in test code.

// ADD_FAILURE unconditionally adds a failure to the current test.
// SUCCEED generates a success - it doesn't automatically make the
// current test successful, as a test is only successful when it has
// no failure.
//
// EXPECT_* verifies that a certain condition is satisfied.  If not,
// it behaves like ADD_FAILURE.  In particular:
//
//   EXPECT_TRUE  verifies that a Boolean condition is true.
//   EXPECT_FALSE verifies that a Boolean condition is false.
//
// FAIL and ASSERT_* are similar to ADD_FAILURE and EXPECT_*, except
// that they will also abort the current function on failure.  People
// usually want the fail-fast behavior of FAIL and ASSERT_*, but those
// writing data-driven tests often find themselves using ADD_FAILURE
// and EXPECT_* more.

// Generates a nonfatal failure with a generic message.
#define ADD_FAILURE() GTEST_NONFATAL_FAILURE_("Failed")

// Generates a nonfatal failure at the given source file location with
// a generic message.
#define ADD_FAILURE_AT(file, line) \
  GTEST_MESSAGE_AT_(file, line, "Failed", \
                    ::testing::TestPartResult::kNonFatalFailure)

// Generates a fatal failure with a generic message.
#define GTEST_FAIL() GTEST_FATAL_FAILURE_("Failed")

// Define this macro to 1 to omit the definition of FAIL(), which is a
// generic name and clashes with some other libraries.
#if !GTEST_DONT_DEFINE_FAIL
# define FAIL() GTEST_FAIL()
#endif

// Generates a success with a generic message.
#define GTEST_SUCCEED() GTEST_SUCCESS_("Succeeded")

// Define this macro to 1 to omit the definition of SUCCEED(), which
// is a generic name and clashes with some other libraries.
#if !GTEST_DONT_DEFINE_SUCCEED
# define SUCCEED() GTEST_SUCCEED()
#endif

// Macros for testing exceptions.
//
//    * {ASSERT|EXPECT}_THROW(statement, expected_exception):
//         Tests that the statement throws the expected exception.
//    * {ASSERT|EXPECT}_NO_THROW(statement):
//         Tests that the statement doesn't throw any exception.
//    * {ASSERT|EXPECT}_ANY_THROW(statement):
//         Tests that the statement throws an exception.

#define EXPECT_THROW(statement, expected_exception) \
  GTEST_TEST_THROW_(statement, expected_exception, GTEST_NONFATAL_FAILURE_)
#define EXPECT_NO_THROW(statement) \
  GTEST_TEST_NO_THROW_(statement, GTEST_NONFATAL_FAILURE_)
#define EXPECT_ANY_THROW(statement) \
  GTEST_TEST_ANY_THROW_(statement, GTEST_NONFATAL_FAILURE_)
#define ASSERT_THROW(statement, expected_exception) \
  GTEST_TEST_THROW_(statement, expected_exception, GTEST_FATAL_FAILURE_)
#define ASSERT_NO_THROW(statement) \
  GTEST_TEST_NO_THROW_(statement, GTEST_FATAL_FAILURE_)
#define ASSERT_ANY_THROW(statement) \
  GTEST_TEST_ANY_THROW_(statement, GTEST_FATAL_FAILURE_)

// Boolean assertions. Condition can be either a Boolean expression or an
// AssertionResult. For more information on how to use AssertionResult with
// these macros see comments on that class.
#define EXPECT_TRUE(condition) \
  GTEST_TEST_BOOLEAN_(condition, #condition, false, true, \
                      GTEST_NONFATAL_FAILURE_)
#define EXPECT_FALSE(condition) \
  GTEST_TEST_BOOLEAN_(!(condition), #condition, true, false, \
                      GTEST_NONFATAL_FAILURE_)
#define ASSERT_TRUE(condition) \
  GTEST_TEST_BOOLEAN_(condition, #condition, false, true, \
                      GTEST_FATAL_FAILURE_)
#define ASSERT_FALSE(condition) \
  GTEST_TEST_BOOLEAN_(!(condition), #condition, true, false, \
                      GTEST_FATAL_FAILURE_)

// Includes the auto-generated header that implements a family of
// generic predicate assertion macros.
// Copyright 2006, Google Inc.
// All rights reserved.
//
// Redistribution and use in source and binary forms, with or without
// modification, are permitted provided that the following conditions are
// met:
//
//     * Redistributions of source code must retain the above copyright
// notice, this list of conditions and the following disclaimer.
//     * Redistributions in binary form must reproduce the above
// copyright notice, this list of conditions and the following disclaimer
// in the documentation and/or other materials provided with the
// distribution.
//     * Neither the name of Google Inc. nor the names of its
// contributors may be used to endorse or promote products derived from
// this software without specific prior written permission.
//
// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
// "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
// LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
// A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
// OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
// SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
// LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
// DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
// THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.

// This file is AUTOMATICALLY GENERATED on 10/31/2011 by command
// 'gen_gtest_pred_impl.py 5'.  DO NOT EDIT BY HAND!
//
// Implements a family of generic predicate assertion macros.

#ifndef GTEST_INCLUDE_GTEST_GTEST_PRED_IMPL_H_
#define GTEST_INCLUDE_GTEST_GTEST_PRED_IMPL_H_

// Makes sure this header is not included before gtest.h.
#ifndef GTEST_INCLUDE_GTEST_GTEST_H_
# error Do not include gtest_pred_impl.h directly.  Include gtest.h instead.
#endif  // GTEST_INCLUDE_GTEST_GTEST_H_

// This header implements a family of generic predicate assertion
// macros:
//
//   ASSERT_PRED_FORMAT1(pred_format, v1)
//   ASSERT_PRED_FORMAT2(pred_format, v1, v2)
//   ...
//
// where pred_format is a function or functor that takes n (in the
// case of ASSERT_PRED_FORMATn) values and their source expression
// text, and returns a testing::AssertionResult.  See the definition
// of ASSERT_EQ in gtest.h for an example.
//
// If you don't care about formatting, you can use the more
// restrictive version:
//
//   ASSERT_PRED1(pred, v1)
//   ASSERT_PRED2(pred, v1, v2)
//   ...
//
// where pred is an n-ary function or functor that returns bool,
// and the values v1, v2, ..., must support the << operator for
// streaming to std::ostream.
//
// We also define the EXPECT_* variations.
//
// For now we only support predicates whose arity is at most 5.
// Please email googletestframework@googlegroups.com if you need
// support for higher arities.

// GTEST_ASSERT_ is the basic statement to which all of the assertions
// in this file reduce.  Don't use this in your code.

#define GTEST_ASSERT_(expression, on_failure) \
  GTEST_AMBIGUOUS_ELSE_BLOCKER_ \
  if (const ::testing::AssertionResult gtest_ar = (expression)) \
    ; \
  else \
    on_failure(gtest_ar.failure_message())


// Helper function for implementing {EXPECT|ASSERT}_PRED1.  Don't use
// this in your code.
template <typename Pred,
          typename T1>
AssertionResult AssertPred1Helper(const char* pred_text,
                                  const char* e1,
                                  Pred pred,
                                  const T1& v1) {
  if (pred(v1)) return AssertionSuccess();

  return AssertionFailure() << pred_text << "("
                            << e1 << ") evaluates to false, where"
                            << "\n" << e1 << " evaluates to " << v1;
}

// Internal macro for implementing {EXPECT|ASSERT}_PRED_FORMAT1.
// Don't use this in your code.
#define GTEST_PRED_FORMAT1_(pred_format, v1, on_failure)\
  GTEST_ASSERT_(pred_format(#v1, v1), \
                on_failure)

// Internal macro for implementing {EXPECT|ASSERT}_PRED1.  Don't use
// this in your code.
#define GTEST_PRED1_(pred, v1, on_failure)\
  GTEST_ASSERT_(::testing::AssertPred1Helper(#pred, \
                                             #v1, \
                                             pred, \
                                             v1), on_failure)

// Unary predicate assertion macros.
#define EXPECT_PRED_FORMAT1(pred_format, v1) \
  GTEST_PRED_FORMAT1_(pred_format, v1, GTEST_NONFATAL_FAILURE_)
#define EXPECT_PRED1(pred, v1) \
  GTEST_PRED1_(pred, v1, GTEST_NONFATAL_FAILURE_)
#define ASSERT_PRED_FORMAT1(pred_format, v1) \
  GTEST_PRED_FORMAT1_(pred_format, v1, GTEST_FATAL_FAILURE_)
#define ASSERT_PRED1(pred, v1) \
  GTEST_PRED1_(pred, v1, GTEST_FATAL_FAILURE_)


// Helper function for implementing {EXPECT|ASSERT}_PRED2.  Don't use
// this in your code.
template <typename Pred,
          typename T1,
          typename T2>
AssertionResult AssertPred2Helper(const char* pred_text,
                                  const char* e1,
                                  const char* e2,
                                  Pred pred,
                                  const T1& v1,
                                  const T2& v2) {
  if (pred(v1, v2)) return AssertionSuccess();

  return AssertionFailure() << pred_text << "("
                            << e1 << ", "
                            << e2 << ") evaluates to false, where"
                            << "\n" << e1 << " evaluates to " << v1
                            << "\n" << e2 << " evaluates to " << v2;
}

// Internal macro for implementing {EXPECT|ASSERT}_PRED_FORMAT2.
// Don't use this in your code.
#define GTEST_PRED_FORMAT2_(pred_format, v1, v2, on_failure)\
  GTEST_ASSERT_(pred_format(#v1, #v2, v1, v2), \
                on_failure)

// Internal macro for implementing {EXPECT|ASSERT}_PRED2.  Don't use
// this in your code.
#define GTEST_PRED2_(pred, v1, v2, on_failure)\
  GTEST_ASSERT_(::testing::AssertPred2Helper(#pred, \
                                             #v1, \
                                             #v2, \
                                             pred, \
                                             v1, \
                                             v2), on_failure)

// Binary predicate assertion macros.
#define EXPECT_PRED_FORMAT2(pred_format, v1, v2) \
  GTEST_PRED_FORMAT2_(pred_format, v1, v2, GTEST_NONFATAL_FAILURE_)
#define EXPECT_PRED2(pred, v1, v2) \
  GTEST_PRED2_(pred, v1, v2, GTEST_NONFATAL_FAILURE_)
#define ASSERT_PRED_FORMAT2(pred_format, v1, v2) \
  GTEST_PRED_FORMAT2_(pred_format, v1, v2, GTEST_FATAL_FAILURE_)
#define ASSERT_PRED2(pred, v1, v2) \
  GTEST_PRED2_(pred, v1, v2, GTEST_FATAL_FAILURE_)


// Helper function for implementing {EXPECT|ASSERT}_PRED3.  Don't use
// this in your code.
template <typename Pred,
          typename T1,
          typename T2,
          typename T3>
AssertionResult AssertPred3Helper(const char* pred_text,
                                  const char* e1,
                                  const char* e2,
                                  const char* e3,
                                  Pred pred,
                                  const T1& v1,
                                  const T2& v2,
                                  const T3& v3) {
  if (pred(v1, v2, v3)) return AssertionSuccess();

  return AssertionFailure() << pred_text << "("
                            << e1 << ", "
                            << e2 << ", "
                            << e3 << ") evaluates to false, where"
                            << "\n" << e1 << " evaluates to " << v1
                            << "\n" << e2 << " evaluates to " << v2
                            << "\n" << e3 << " evaluates to " << v3;
}

// Internal macro for implementing {EXPECT|ASSERT}_PRED_FORMAT3.
// Don't use this in your code.
#define GTEST_PRED_FORMAT3_(pred_format, v1, v2, v3, on_failure)\
  GTEST_ASSERT_(pred_format(#v1, #v2, #v3, v1, v2, v3), \
                on_failure)

// Internal macro for implementing {EXPECT|ASSERT}_PRED3.  Don't use
// this in your code.
#define GTEST_PRED3_(pred, v1, v2, v3, on_failure)\
  GTEST_ASSERT_(::testing::AssertPred3Helper(#pred, \
                                             #v1, \
                                             #v2, \
                                             #v3, \
                                             pred, \
                                             v1, \
                                             v2, \
                                             v3), on_failure)

// Ternary predicate assertion macros.
#define EXPECT_PRED_FORMAT3(pred_format, v1, v2, v3) \
  GTEST_PRED_FORMAT3_(pred_format, v1, v2, v3, GTEST_NONFATAL_FAILURE_)
#define EXPECT_PRED3(pred, v1, v2, v3) \
  GTEST_PRED3_(pred, v1, v2, v3, GTEST_NONFATAL_FAILURE_)
#define ASSERT_PRED_FORMAT3(pred_format, v1, v2, v3) \
  GTEST_PRED_FORMAT3_(pred_format, v1, v2, v3, GTEST_FATAL_FAILURE_)
#define ASSERT_PRED3(pred, v1, v2, v3) \
  GTEST_PRED3_(pred, v1, v2, v3, GTEST_FATAL_FAILURE_)


// Helper function for implementing {EXPECT|ASSERT}_PRED4.  Don't use
// this in your code.
template <typename Pred,
          typename T1,
          typename T2,
          typename T3,
          typename T4>
AssertionResult AssertPred4Helper(const char* pred_text,
                                  const char* e1,
                                  const char* e2,
                                  const char* e3,
                                  const char* e4,
                                  Pred pred,
                                  const T1& v1,
                                  const T2& v2,
                                  const T3& v3,
                                  const T4& v4) {
  if (pred(v1, v2, v3, v4)) return AssertionSuccess();

  return AssertionFailure() << pred_text << "("
                            << e1 << ", "
                            << e2 << ", "
                            << e3 << ", "
                            << e4 << ") evaluates to false, where"
                            << "\n" << e1 << " evaluates to " << v1
                            << "\n" << e2 << " evaluates to " << v2
                            << "\n" << e3 << " evaluates to " << v3
                            << "\n" << e4 << " evaluates to " << v4;
}

// Internal macro for implementing {EXPECT|ASSERT}_PRED_FORMAT4.
// Don't use this in your code.
#define GTEST_PRED_FORMAT4_(pred_format, v1, v2, v3, v4, on_failure)\
  GTEST_ASSERT_(pred_format(#v1, #v2, #v3, #v4, v1, v2, v3, v4), \
                on_failure)

// Internal macro for implementing {EXPECT|ASSERT}_PRED4.  Don't use
// this in your code.
#define GTEST_PRED4_(pred, v1, v2, v3, v4, on_failure)\
  GTEST_ASSERT_(::testing::AssertPred4Helper(#pred, \
                                             #v1, \
                                             #v2, \
                                             #v3, \
                                             #v4, \
                                             pred, \
                                             v1, \
                                             v2, \
                                             v3, \
                                             v4), on_failure)

// 4-ary predicate assertion macros.
#define EXPECT_PRED_FORMAT4(pred_format, v1, v2, v3, v4) \
  GTEST_PRED_FORMAT4_(pred_format, v1, v2, v3, v4, GTEST_NONFATAL_FAILURE_)
#define EXPECT_PRED4(pred, v1, v2, v3, v4) \
  GTEST_PRED4_(pred, v1, v2, v3, v4, GTEST_NONFATAL_FAILURE_)
#define ASSERT_PRED_FORMAT4(pred_format, v1, v2, v3, v4) \
  GTEST_PRED_FORMAT4_(pred_format, v1, v2, v3, v4, GTEST_FATAL_FAILURE_)
#define ASSERT_PRED4(pred, v1, v2, v3, v4) \
  GTEST_PRED4_(pred, v1, v2, v3, v4, GTEST_FATAL_FAILURE_)


// Helper function for implementing {EXPECT|ASSERT}_PRED5.  Don't use
// this in your code.
template <typename Pred,
          typename T1,
          typename T2,
          typename T3,
          typename T4,
          typename T5>
AssertionResult AssertPred5Helper(const char* pred_text,
                                  const char* e1,
                                  const char* e2,
                                  const char* e3,
                                  const char* e4,
                                  const char* e5,
                                  Pred pred,
                                  const T1& v1,
                                  const T2& v2,
                                  const T3& v3,
                                  const T4& v4,
                                  const T5& v5) {
  if (pred(v1, v2, v3, v4, v5)) return AssertionSuccess();

  return AssertionFailure() << pred_text << "("
                            << e1 << ", "
                            << e2 << ", "
                            << e3 << ", "
                            << e4 << ", "
                            << e5 << ") evaluates to false, where"
                            << "\n" << e1 << " evaluates to " << v1
                            << "\n" << e2 << " evaluates to " << v2
                            << "\n" << e3 << " evaluates to " << v3
                            << "\n" << e4 << " evaluates to " << v4
                            << "\n" << e5 << " evaluates to " << v5;
}

// Internal macro for implementing {EXPECT|ASSERT}_PRED_FORMAT5.
// Don't use this in your code.
#define GTEST_PRED_FORMAT5_(pred_format, v1, v2, v3, v4, v5, on_failure)\
  GTEST_ASSERT_(pred_format(#v1, #v2, #v3, #v4, #v5, v1, v2, v3, v4, v5), \
                on_failure)

// Internal macro for implementing {EXPECT|ASSERT}_PRED5.  Don't use
// this in your code.
#define GTEST_PRED5_(pred, v1, v2, v3, v4, v5, on_failure)\
  GTEST_ASSERT_(::testing::AssertPred5Helper(#pred, \
                                             #v1, \
                                             #v2, \
                                             #v3, \
                                             #v4, \
                                             #v5, \
                                             pred, \
                                             v1, \
                                             v2, \
                                             v3, \
                                             v4, \
                                             v5), on_failure)

// 5-ary predicate assertion macros.
#define EXPECT_PRED_FORMAT5(pred_format, v1, v2, v3, v4, v5) \
  GTEST_PRED_FORMAT5_(pred_format, v1, v2, v3, v4, v5, GTEST_NONFATAL_FAILURE_)
#define EXPECT_PRED5(pred, v1, v2, v3, v4, v5) \
  GTEST_PRED5_(pred, v1, v2, v3, v4, v5, GTEST_NONFATAL_FAILURE_)
#define ASSERT_PRED_FORMAT5(pred_format, v1, v2, v3, v4, v5) \
  GTEST_PRED_FORMAT5_(pred_format, v1, v2, v3, v4, v5, GTEST_FATAL_FAILURE_)
#define ASSERT_PRED5(pred, v1, v2, v3, v4, v5) \
  GTEST_PRED5_(pred, v1, v2, v3, v4, v5, GTEST_FATAL_FAILURE_)


#endif  // GTEST_INCLUDE_GTEST_GTEST_PRED_IMPL_H_

// Macros for testing equalities and inequalities.
//
//    * {ASSERT|EXPECT}_EQ(expected, actual): Tests that expected == actual
//    * {ASSERT|EXPECT}_NE(v1, v2):           Tests that v1 != v2
//    * {ASSERT|EXPECT}_LT(v1, v2):           Tests that v1 < v2
//    * {ASSERT|EXPECT}_LE(v1, v2):           Tests that v1 <= v2
//    * {ASSERT|EXPECT}_GT(v1, v2):           Tests that v1 > v2
//    * {ASSERT|EXPECT}_GE(v1, v2):           Tests that v1 >= v2
//
// When they are not, Google Test prints both the tested expressions and
// their actual values.  The values must be compatible built-in types,
// or you will get a compiler error.  By "compatible" we mean that the
// values can be compared by the respective operator.
//
// Note:
//
//   1. It is possible to make a user-defined type work with
//   {ASSERT|EXPECT}_??(), but that requires overloading the
//   comparison operators and is thus discouraged by the Google C++
//   Usage Guide.  Therefore, you are advised to use the
//   {ASSERT|EXPECT}_TRUE() macro to assert that two objects are
//   equal.
//
//   2. The {ASSERT|EXPECT}_??() macros do pointer comparisons on
//   pointers (in particular, C strings).  Therefore, if you use it
//   with two C strings, you are testing how their locations in memory
//   are related, not how their content is related.  To compare two C
//   strings by content, use {ASSERT|EXPECT}_STR*().
//
//   3. {ASSERT|EXPECT}_EQ(expected, actual) is preferred to
//   {ASSERT|EXPECT}_TRUE(expected == actual), as the former tells you
//   what the actual value is when it fails, and similarly for the
//   other comparisons.
//
//   4. Do not depend on the order in which {ASSERT|EXPECT}_??()
//   evaluate their arguments, which is undefined.
//
//   5. These macros evaluate their arguments exactly once.
//
// Examples:
//
//   EXPECT_NE(5, Foo());
//   EXPECT_EQ(NULL, a_pointer);
//   ASSERT_LT(i, array_size);
//   ASSERT_GT(records.size(), 0) << "There is no record left.";

#define EXPECT_EQ(expected, actual) \
  EXPECT_PRED_FORMAT2(::testing::internal:: \
                      EqHelper<GTEST_IS_NULL_LITERAL_(expected)>::Compare, \
                      expected, actual)
#define EXPECT_NE(expected, actual) \
  EXPECT_PRED_FORMAT2(::testing::internal::CmpHelperNE, expected, actual)
#define EXPECT_LE(val1, val2) \
  EXPECT_PRED_FORMAT2(::testing::internal::CmpHelperLE, val1, val2)
#define EXPECT_LT(val1, val2) \
  EXPECT_PRED_FORMAT2(::testing::internal::CmpHelperLT, val1, val2)
#define EXPECT_GE(val1, val2) \
  EXPECT_PRED_FORMAT2(::testing::internal::CmpHelperGE, val1, val2)
#define EXPECT_GT(val1, val2) \
  EXPECT_PRED_FORMAT2(::testing::internal::CmpHelperGT, val1, val2)

#define GTEST_ASSERT_EQ(expected, actual) \
  ASSERT_PRED_FORMAT2(::testing::internal:: \
                      EqHelper<GTEST_IS_NULL_LITERAL_(expected)>::Compare, \
                      expected, actual)
#define GTEST_ASSERT_NE(val1, val2) \
  ASSERT_PRED_FORMAT2(::testing::internal::CmpHelperNE, val1, val2)
#define GTEST_ASSERT_LE(val1, val2) \
  ASSERT_PRED_FORMAT2(::testing::internal::CmpHelperLE, val1, val2)
#define GTEST_ASSERT_LT(val1, val2) \
  ASSERT_PRED_FORMAT2(::testing::internal::CmpHelperLT, val1, val2)
#define GTEST_ASSERT_GE(val1, val2) \
  ASSERT_PRED_FORMAT2(::testing::internal::CmpHelperGE, val1, val2)
#define GTEST_ASSERT_GT(val1, val2) \
  ASSERT_PRED_FORMAT2(::testing::internal::CmpHelperGT, val1, val2)

// Define macro GTEST_DONT_DEFINE_ASSERT_XY to 1 to omit the definition of
// ASSERT_XY(), which clashes with some users' own code.

#if !GTEST_DONT_DEFINE_ASSERT_EQ
# define ASSERT_EQ(val1, val2) GTEST_ASSERT_EQ(val1, val2)
#endif

#if !GTEST_DONT_DEFINE_ASSERT_NE
# define ASSERT_NE(val1, val2) GTEST_ASSERT_NE(val1, val2)
#endif

#if !GTEST_DONT_DEFINE_ASSERT_LE
# define ASSERT_LE(val1, val2) GTEST_ASSERT_LE(val1, val2)
#endif

#if !GTEST_DONT_DEFINE_ASSERT_LT
# define ASSERT_LT(val1, val2) GTEST_ASSERT_LT(val1, val2)
#endif

#if !GTEST_DONT_DEFINE_ASSERT_GE
# define ASSERT_GE(val1, val2) GTEST_ASSERT_GE(val1, val2)
#endif

#if !GTEST_DONT_DEFINE_ASSERT_GT
# define ASSERT_GT(val1, val2) GTEST_ASSERT_GT(val1, val2)
#endif

// C-string Comparisons.  All tests treat NULL and any non-NULL string
// as different.  Two NULLs are equal.
//
//    * {ASSERT|EXPECT}_STREQ(s1, s2):     Tests that s1 == s2
//    * {ASSERT|EXPECT}_STRNE(s1, s2):     Tests that s1 != s2
//    * {ASSERT|EXPECT}_STRCASEEQ(s1, s2): Tests that s1 == s2, ignoring case
//    * {ASSERT|EXPECT}_STRCASENE(s1, s2): Tests that s1 != s2, ignoring case
//
// For wide or narrow string objects, you can use the
// {ASSERT|EXPECT}_??() macros.
//
// Don't depend on the order in which the arguments are evaluated,
// which is undefined.
//
// These macros evaluate their arguments exactly once.

#define EXPECT_STREQ(expected, actual) \
  EXPECT_PRED_FORMAT2(::testing::internal::CmpHelperSTREQ, expected, actual)
#define EXPECT_STRNE(s1, s2) \
  EXPECT_PRED_FORMAT2(::testing::internal::CmpHelperSTRNE, s1, s2)
#define EXPECT_STRCASEEQ(expected, actual) \
  EXPECT_PRED_FORMAT2(::testing::internal::CmpHelperSTRCASEEQ, expected, actual)
#define EXPECT_STRCASENE(s1, s2)\
  EXPECT_PRED_FORMAT2(::testing::internal::CmpHelperSTRCASENE, s1, s2)

#define ASSERT_STREQ(expected, actual) \
  ASSERT_PRED_FORMAT2(::testing::internal::CmpHelperSTREQ, expected, actual)
#define ASSERT_STRNE(s1, s2) \
  ASSERT_PRED_FORMAT2(::testing::internal::CmpHelperSTRNE, s1, s2)
#define ASSERT_STRCASEEQ(expected, actual) \
  ASSERT_PRED_FORMAT2(::testing::internal::CmpHelperSTRCASEEQ, expected, actual)
#define ASSERT_STRCASENE(s1, s2)\
  ASSERT_PRED_FORMAT2(::testing::internal::CmpHelperSTRCASENE, s1, s2)

// Macros for comparing floating-point numbers.
//
//    * {ASSERT|EXPECT}_FLOAT_EQ(expected, actual):
//         Tests that two float values are almost equal.
//    * {ASSERT|EXPECT}_DOUBLE_EQ(expected, actual):
//         Tests that two double values are almost equal.
//    * {ASSERT|EXPECT}_NEAR(v1, v2, abs_error):
//         Tests that v1 and v2 are within the given distance to each other.
//
// Google Test uses ULP-based comparison to automatically pick a default
// error bound that is appropriate for the operands.  See the
// FloatingPoint template class in gtest-internal.h if you are
// interested in the implementation details.

#define EXPECT_FLOAT_EQ(expected, actual)\
  EXPECT_PRED_FORMAT2(::testing::internal::CmpHelperFloatingPointEQ<float>, \
                      expected, actual)

#define EXPECT_DOUBLE_EQ(expected, actual)\
  EXPECT_PRED_FORMAT2(::testing::internal::CmpHelperFloatingPointEQ<double>, \
                      expected, actual)

#define ASSERT_FLOAT_EQ(expected, actual)\
  ASSERT_PRED_FORMAT2(::testing::internal::CmpHelperFloatingPointEQ<float>, \
                      expected, actual)

#define ASSERT_DOUBLE_EQ(expected, actual)\
  ASSERT_PRED_FORMAT2(::testing::internal::CmpHelperFloatingPointEQ<double>, \
                      expected, actual)

#define EXPECT_NEAR(val1, val2, abs_error)\
  EXPECT_PRED_FORMAT3(::testing::internal::DoubleNearPredFormat, \
                      val1, val2, abs_error)

#define ASSERT_NEAR(val1, val2, abs_error)\
  ASSERT_PRED_FORMAT3(::testing::internal::DoubleNearPredFormat, \
                      val1, val2, abs_error)

// These predicate format functions work on floating-point values, and
// can be used in {ASSERT|EXPECT}_PRED_FORMAT2*(), e.g.
//
//   EXPECT_PRED_FORMAT2(testing::DoubleLE, Foo(), 5.0);

// Asserts that val1 is less than, or almost equal to, val2.  Fails
// otherwise.  In particular, it fails if either val1 or val2 is NaN.
GTEST_API_ AssertionResult FloatLE(const char* expr1, const char* expr2,
                                   float val1, float val2);
GTEST_API_ AssertionResult DoubleLE(const char* expr1, const char* expr2,
                                    double val1, double val2);


#if GTEST_OS_WINDOWS

// Macros that test for HRESULT failure and success, these are only useful
// on Windows, and rely on Windows SDK macros and APIs to compile.
//
//    * {ASSERT|EXPECT}_HRESULT_{SUCCEEDED|FAILED}(expr)
//
// When expr unexpectedly fails or succeeds, Google Test prints the
// expected result and the actual result with both a human-readable
// string representation of the error, if available, as well as the
// hex result code.
# define EXPECT_HRESULT_SUCCEEDED(expr) \
    EXPECT_PRED_FORMAT1(::testing::internal::IsHRESULTSuccess, (expr))

# define ASSERT_HRESULT_SUCCEEDED(expr) \
    ASSERT_PRED_FORMAT1(::testing::internal::IsHRESULTSuccess, (expr))

# define EXPECT_HRESULT_FAILED(expr) \
    EXPECT_PRED_FORMAT1(::testing::internal::IsHRESULTFailure, (expr))

# define ASSERT_HRESULT_FAILED(expr) \
    ASSERT_PRED_FORMAT1(::testing::internal::IsHRESULTFailure, (expr))

#endif  // GTEST_OS_WINDOWS

// Macros that execute statement and check that it doesn't generate new fatal
// failures in the current thread.
//
//   * {ASSERT|EXPECT}_NO_FATAL_FAILURE(statement);
//
// Examples:
//
//   EXPECT_NO_FATAL_FAILURE(Process());
//   ASSERT_NO_FATAL_FAILURE(Process()) << "Process() failed";
//
#define ASSERT_NO_FATAL_FAILURE(statement) \
    GTEST_TEST_NO_FATAL_FAILURE_(statement, GTEST_FATAL_FAILURE_)
#define EXPECT_NO_FATAL_FAILURE(statement) \
    GTEST_TEST_NO_FATAL_FAILURE_(statement, GTEST_NONFATAL_FAILURE_)

// Causes a trace (including the source file path, the current line
// number, and the given message) to be included in every test failure
// message generated by code in the current scope.  The effect is
// undone when the control leaves the current scope.
//
// The message argument can be anything streamable to std::ostream.
//
// In the implementation, we include the current line number as part
// of the dummy variable name, thus allowing multiple SCOPED_TRACE()s
// to appear in the same block - as long as they are on different
// lines.
#define SCOPED_TRACE(message) \
  ::testing::internal::ScopedTrace GTEST_CONCAT_TOKEN_(gtest_trace_, __LINE__)(\
    __FILE__, __LINE__, ::testing::Message() << (message))

// Compile-time assertion for type equality.
// StaticAssertTypeEq<type1, type2>() compiles iff type1 and type2 are
// the same type.  The value it returns is not interesting.
//
// Instead of making StaticAssertTypeEq a class template, we make it a
// function template that invokes a helper class template.  This
// prevents a user from misusing StaticAssertTypeEq<T1, T2> by
// defining objects of that type.
//
// CAVEAT:
//
// When used inside a method of a class template,
// StaticAssertTypeEq<T1, T2>() is effective ONLY IF the method is
// instantiated.  For example, given:
//
//   template <typename T> class Foo {
//    public:
//     void Bar() { testing::StaticAssertTypeEq<int, T>(); }
//   };
//
// the code:
//
//   void Test1() { Foo<bool> foo; }
//
// will NOT generate a compiler error, as Foo<bool>::Bar() is never
// actually instantiated.  Instead, you need:
//
//   void Test2() { Foo<bool> foo; foo.Bar(); }
//
// to cause a compiler error.
template <typename T1, typename T2>
bool StaticAssertTypeEq() {
  (void)internal::StaticAssertTypeEqHelper<T1, T2>();
  return true;
}

// Defines a test.
//
// The first parameter is the name of the test case, and the second
// parameter is the name of the test within the test case.
//
// The convention is to end the test case name with "Test".  For
// example, a test case for the Foo class can be named FooTest.
//
// Test code should appear between braces after an invocation of
// this macro.  Example:
//
//   TEST(FooTest, InitializesCorrectly) {
//     Foo foo;
//     EXPECT_TRUE(foo.StatusIsOK());
//   }

// Note that we call GetTestTypeId() instead of GetTypeId<
// ::testing::Test>() here to get the type ID of testing::Test.  This
// is to work around a suspected linker bug when using Google Test as
// a framework on Mac OS X.  The bug causes GetTypeId<
// ::testing::Test>() to return different values depending on whether
// the call is from the Google Test framework itself or from user test
// code.  GetTestTypeId() is guaranteed to always return the same
// value, as it always calls GetTypeId<>() from the Google Test
// framework.
#define GTEST_TEST(test_case_name, test_name)\
  GTEST_TEST_(test_case_name, test_name, \
              ::testing::Test, ::testing::internal::GetTestTypeId())

// Define this macro to 1 to omit the definition of TEST(), which
// is a generic name and clashes with some other libraries.
#if !GTEST_DONT_DEFINE_TEST
# define TEST(test_case_name, test_name) GTEST_TEST(test_case_name, test_name)
#endif

// Defines a test that uses a test fixture.
//
// The first parameter is the name of the test fixture class, which
// also doubles as the test case name.  The second parameter is the
// name of the test within the test case.
//
// A test fixture class must be declared earlier.  The user should put
// his test code between braces after using this macro.  Example:
//
//   class FooTest : public testing::Test {
//    protected:
//     virtual void SetUp() { b_.AddElement(3); }
//
//     Foo a_;
//     Foo b_;
//   };
//
//   TEST_F(FooTest, InitializesCorrectly) {
//     EXPECT_TRUE(a_.StatusIsOK());
//   }
//
//   TEST_F(FooTest, ReturnsElementCountCorrectly) {
//     EXPECT_EQ(0, a_.size());
//     EXPECT_EQ(1, b_.size());
//   }

#define TEST_F(test_fixture, test_name)\
  GTEST_TEST_(test_fixture, test_name, test_fixture, \
              ::testing::internal::GetTypeId<test_fixture>())

}  // namespace testing

// Use this function in main() to run all tests.  It returns 0 if all
// tests are successful, or 1 otherwise.
//
// RUN_ALL_TESTS() should be invoked after the command line has been
// parsed by InitGoogleTest().
//
// This function was formerly a macro; thus, it is in the global
// namespace and has an all-caps name.
int RUN_ALL_TESTS() GTEST_MUST_USE_RESULT_;

inline int RUN_ALL_TESTS() {
  return ::testing::UnitTest::GetInstance()->Run();
}

#endif  // GTEST_INCLUDE_GTEST_GTEST_H_


================================================
FILE: libs/gtest_mpi/external/gtest/src/gtest-all.cpp
================================================
// Copyright 2008, Google Inc.
// All rights reserved.
//
// Redistribution and use in source and binary forms, with or without
// modification, are permitted provided that the following conditions are
// met:
//
//     * Redistributions of source code must retain the above copyright
// notice, this list of conditions and the following disclaimer.
//     * Redistributions in binary form must reproduce the above
// copyright notice, this list of conditions and the following disclaimer
// in the documentation and/or other materials provided with the
// distribution.
//     * Neither the name of Google Inc. nor the names of its
// contributors may be used to endorse or promote products derived from
// this software without specific prior written permission.
//
// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
// "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
// LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
// A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
// OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
// SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
// LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
// DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
// THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
//
// Author: mheule@google.com (Markus Heule)
//
// Google C++ Testing Framework (Google Test)
//
// Sometimes it's desirable to build Google Test by compiling a single file.
// This file serves this purpose.

// This line ensures that gtest.h can be compiled on its own, even
// when it's fused.
#include "gtest/gtest.h"

// The following lines pull in the real gtest *.cc files.
// Copyright 2005, Google Inc.
// All rights reserved.
//
// Redistribution and use in source and binary forms, with or without
// modification, are permitted provided that the following conditions are
// met:
//
//     * Redistributions of source code must retain the above copyright
// notice, this list of conditions and the following disclaimer.
//     * Redistributions in binary form must reproduce the above
// copyright notice, this list of conditions and the following disclaimer
// in the documentation and/or other materials provided with the
// distribution.
//     * Neither the name of Google Inc. nor the names of its
// contributors may be used to endorse or promote products derived from
// this software without specific prior written permission.
//
// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
// "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
// LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
// A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
// OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
// SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
// LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
// DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
// THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
//
// Author: wan@google.com (Zhanyong Wan)
//
// The Google C++ Testing Framework (Google Test)

// Copyright 2007, Google Inc.
// All rights reserved.
//
// Redistribution and use in source and binary forms, with or without
// modification, are permitted provided that the following conditions are
// met:
//
//     * Redistributions of source code must retain the above copyright
// notice, this list of conditions and the following disclaimer.
//     * Redistributions in binary form must reproduce the above
// copyright notice, this list of conditions and the following disclaimer
// in the documentation and/or other materials provided with the
// distribution.
//     * Neither the name of Google Inc. nor the names of its
// contributors may be used to endorse or promote products derived from
// this software without specific prior written permission.
//
// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
// "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
// LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
// A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
// OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
// SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
// LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
// DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
// THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
//
// Author: wan@google.com (Zhanyong Wan)
//
// Utilities for testing Google Test itself and code that uses Google Test
// (e.g. frameworks built on top of Google Test).

#ifndef GTEST_INCLUDE_GTEST_GTEST_SPI_H_
#define GTEST_INCLUDE_GTEST_GTEST_SPI_H_


namespace testing {

// This helper class can be used to mock out Google Test failure reporting
// so that we can test Google Test or code that builds on Google Test.
//
// An object of this class appends a TestPartResult object to the
// TestPartResultArray object given in the constructor whenever a Google Test
// failure is reported. It can either intercept only failures that are
// generated in the same thread that created this object or it can intercept
// all generated failures. The scope of this mock object can be controlled with
// the second argument to the two arguments constructor.
class GTEST_API_ ScopedFakeTestPartResultReporter
    : public TestPartResultReporterInterface {
 public:
  // The two possible mocking modes of this object.
  enum InterceptMode {
    INTERCEPT_ONLY_CURRENT_THREAD,  // Intercepts only thread local failures.
    INTERCEPT_ALL_THREADS           // Intercepts all failures.
  };

  // The c'tor sets this object as the test part result reporter used
  // by Google Test.  The 'result' parameter specifies where to report the
  // results. This reporter will only catch failures generated in the current
  // thread. DEPRECATED
  explicit ScopedFakeTestPartResultReporter(TestPartResultArray* result);

  // Same as above, but you can choose the interception scope of this object.
  ScopedFakeTestPartResultReporter(InterceptMode intercept_mode,
                                   TestPartResultArray* result);

  // The d'tor restores the previous test part result reporter.
  virtual ~ScopedFakeTestPartResultReporter();

  // Appends the TestPartResult object to the TestPartResultArray
  // received in the constructor.
  //
  // This method is from the TestPartResultReporterInterface
  // interface.
  virtual void ReportTestPartResult(const TestPartResult& result);
 private:
  void Init();

  const InterceptMode intercept_mode_;
  TestPartResultReporterInterface* old_reporter_;
  TestPartResultArray* const result_;

  GTEST_DISALLOW_COPY_AND_ASSIGN_(ScopedFakeTestPartResultReporter);
};

namespace internal {

// A helper class for implementing EXPECT_FATAL_FAILURE() and
// EXPECT_NONFATAL_FAILURE().  Its destructor verifies that the given
// TestPartResultArray contains exactly one failure that has the given
// type and contains the given substring.  If that's not the case, a
// non-fatal failure will be generated.
class GTEST_API_ SingleFailureChecker {
 public:
  // The constructor remembers the arguments.
  SingleFailureChecker(const TestPartResultArray* results,
                       TestPartResult::Type type,
                       const string& substr);
  ~SingleFailureChecker();
 private:
  const TestPartResultArray* const results_;
  const TestPartResult::Type type_;
  const string substr_;

  GTEST_DISALLOW_COPY_AND_ASSIGN_(SingleFailureChecker);
};

}  // namespace internal

}  // namespace testing

// A set of macros for testing Google Test assertions or code that's expected
// to generate Google Test fatal failures.  It verifies that the given
// statement will cause exactly one fatal Google Test failure with 'substr'
// being part of the failure message.
//
// There are two different versions of this macro. EXPECT_FATAL_FAILURE only
// affects and considers failures generated in the current thread and
// EXPECT_FATAL_FAILURE_ON_ALL_THREADS does the same but for all threads.
//
// The verification of the assertion is done correctly even when the statement
// throws an exception or aborts the current function.
//
// Known restrictions:
//   - 'statement' cannot reference local non-static variables or
//     non-static members of the current object.
//   - 'statement' cannot return a value.
//   - You cannot stream a failure message to this macro.
//
// Note that even though the implementations of the following two
// macros are much alike, we cannot refactor them to use a common
// helper macro, due to some peculiarity in how the preprocessor
// works.  The AcceptsMacroThatExpandsToUnprotectedComma test in
// gtest_unittest.cc will fail to compile if we do that.
#define EXPECT_FATAL_FAILURE(statement, substr) \
  do { \
    class GTestExpectFatalFailureHelper {\
     public:\
      static void Execute() { statement; }\
    };\
    ::testing::TestPartResultArray gtest_failures;\
    ::testing::internal::SingleFailureChecker gtest_checker(\
        &gtest_failures, ::testing::TestPartResult::kFatalFailure, (substr));\
    {\
      ::testing::ScopedFakeTestPartResultReporter gtest_reporter(\
          ::testing::ScopedFakeTestPartResultReporter:: \
          INTERCEPT_ONLY_CURRENT_THREAD, &gtest_failures);\
      GTestExpectFatalFailureHelper::Execute();\
    }\
  } while (::testing::internal::AlwaysFalse())

#define EXPECT_FATAL_FAILURE_ON_ALL_THREADS(statement, substr) \
  do { \
    class GTestExpectFatalFailureHelper {\
     public:\
      static void Execute() { statement; }\
    };\
    ::testing::TestPartResultArray gtest_failures;\
    ::testing::internal::SingleFailureChecker gtest_checker(\
        &gtest_failures, ::testing::TestPartResult::kFatalFailure, (substr));\
    {\
      ::testing::ScopedFakeTestPartResultReporter gtest_reporter(\
          ::testing::ScopedFakeTestPartResultReporter:: \
          INTERCEPT_ALL_THREADS, &gtest_failures);\
      GTestExpectFatalFailureHelper::Execute();\
    }\
  } while (::testing::internal::AlwaysFalse())

// A macro for testing Google Test assertions or code that's expected to
// generate Google Test non-fatal failures.  It asserts that the given
// statement will cause exactly one non-fatal Google Test failure with 'substr'
// being part of the failure message.
//
// There are two different versions of this macro. EXPECT_NONFATAL_FAILURE only
// affects and considers failures generated in the current thread and
// EXPECT_NONFATAL_FAILURE_ON_ALL_THREADS does the same but for all threads.
//
// 'statement' is allowed to reference local variables and members of
// the current object.
//
// The verification of the assertion is done correctly even when the statement
// throws an exception or aborts the current function.
//
// Known restrictions:
//   - You cannot stream a failure message to this macro.
//
// Note that even though the implementations of the following two
// macros are much alike, we cannot refactor them to use a common
// helper macro, due to some peculiarity in how the preprocessor
// works.  If we do that, the code won't compile when the user gives
// EXPECT_NONFATAL_FAILURE() a statement that contains a macro that
// expands to code containing an unprotected comma.  The
// AcceptsMacroThatExpandsToUnprotectedComma test in gtest_unittest.cc
// catches that.
//
// For the same reason, we have to write
//   if (::testing::internal::AlwaysTrue()) { statement; }
// instead of
//   GTEST_SUPPRESS_UNREACHABLE_CODE_WARNING_BELOW_(statement)
// to avoid an MSVC warning on unreachable code.
#define EXPECT_NONFATAL_FAILURE(statement, substr) \
  do {\
    ::testing::TestPartResultArray gtest_failures;\
    ::testing::internal::SingleFailureChecker gtest_checker(\
        &gtest_failures, ::testing::TestPartResult::kNonFatalFailure, \
        (substr));\
    {\
      ::testing::ScopedFakeTestPartResultReporter gtest_reporter(\
          ::testing::ScopedFakeTestPartResultReporter:: \
          INTERCEPT_ONLY_CURRENT_THREAD, &gtest_failures);\
      if (::testing::internal::AlwaysTrue()) { statement; }\
    }\
  } while (::testing::internal::AlwaysFalse())

#define EXPECT_NONFATAL_FAILURE_ON_ALL_THREADS(statement, substr) \
  do {\
    ::testing::TestPartResultArray gtest_failures;\
    ::testing::internal::SingleFailureChecker gtest_checker(\
        &gtest_failures, ::testing::TestPartResult::kNonFatalFailure, \
        (substr));\
    {\
      ::testing::ScopedFakeTestPartResultReporter gtest_reporter(\
          ::testing::ScopedFakeTestPartResultReporter::INTERCEPT_ALL_THREADS, \
          &gtest_failures);\
      if (::testing::internal::AlwaysTrue()) { statement; }\
    }\
  } while (::testing::internal::AlwaysFalse())

#endif  // GTEST_INCLUDE_GTEST_GTEST_SPI_H_

#include <ctype.h>
#include <math.h>
#include <stdarg.h>
#include <stdio.h>
#include <stdlib.h>
#include <time.h>
#include <wchar.h>
#include <wctype.h>

#include <algorithm>
#include <iomanip>
#include <limits>
#include <list>
#include <map>
#include <ostream>  // NOLINT
#include <sstream>
#include <vector>

#if GTEST_OS_LINUX

// TODO(kenton@google.com): Use autoconf to detect availability of
// gettimeofday().
# define GTEST_HAS_GETTIMEOFDAY_ 1

# include <fcntl.h>  // NOLINT
# include <limits.h>  // NOLINT
# include <sched.h>  // NOLINT
// Declares vsnprintf().  This header is not available on Windows.
# include <strings.h>  // NOLINT
# include <sys/mman.h>  // NOLINT
# include <sys/time.h>  // NOLINT
# include <unistd.h>  // NOLINT
# include <string>

#elif GTEST_OS_SYMBIAN
# define GTEST_HAS_GETTIMEOFDAY_ 1
# include <sys/time.h>  // NOLINT

#elif GTEST_OS_ZOS
# define GTEST_HAS_GETTIMEOFDAY_ 1
# include <sys/time.h>  // NOLINT

// On z/OS we additionally need strings.h for strcasecmp.
# include <strings.h>  // NOLINT

#elif GTEST_OS_WINDOWS_MOBILE  // We are on Windows CE.

# include <windows.h>  // NOLINT
# undef min

#elif GTEST_OS_WINDOWS  // We are on Windows proper.

# include <io.h>  // NOLINT
# include <sys/timeb.h>  // NOLINT
# include <sys/types.h>  // NOLINT
# include <sys/stat.h>  // NOLINT

# if GTEST_OS_WINDOWS_MINGW
// MinGW has gettimeofday() but not _ftime64().
// TODO(kenton@google.com): Use autoconf to detect availability of
//   gettimeofday().
// TODO(kenton@google.com): There are other ways to get the time on
//   Windows, like GetTickCount() or GetSystemTimeAsFileTime().  MinGW
//   supports these.  consider using them instead.
#  define GTEST_HAS_GETTIMEOFDAY_ 1
#  include <sys/time.h>  // NOLINT
# endif  // GTEST_OS_WINDOWS_MINGW

// cpplint thinks that the header is already included, so we want to
// silence it.
# include <windows.h>  // NOLINT
# undef min

#else

// Assume other platforms have gettimeofday().
// TODO(kenton@google.com): Use autoconf to detect availability of
//   gettimeofday().
# define GTEST_HAS_GETTIMEOFDAY_ 1

// cpplint thinks that the header is already included, so we want to
// silence it.
# include <sys/time.h>  // NOLINT
# include <unistd.h>  // NOLINT

#endif  // GTEST_OS_LINUX

#if GTEST_HAS_EXCEPTIONS
# include <stdexcept>
#endif

#if GTEST_CAN_STREAM_RESULTS_
# include <arpa/inet.h>  // NOLINT
# include <netdb.h>  // NOLINT
# include <sys/socket.h>  // NOLINT
# include <sys/types.h>  // NOLINT
#endif

// Indicates that this translation unit is part of Google Test's
// implementation.  It must come before gtest-internal-inl.h is
// included, or there will be a compiler error.  This trick is to
// prevent a user from accidentally including gtest-internal-inl.h in
// his code.
#define GTEST_IMPLEMENTATION_ 1
// Copyright 2005, Google Inc.
// All rights reserved.
//
// Redistribution and use in source and binary forms, with or without
// modification, are permitted provided that the following conditions are
// met:
//
//     * Redistributions of source code must retain the above copyright
// notice, this list of conditions and the following disclaimer.
//     * Redistributions in binary form must reproduce the above
// copyright notice, this list of conditions and the following disclaimer
// in the documentation and/or other materials provided with the
// distribution.
//     * Neither the name of Google Inc. nor the names of its
// contributors may be used to endorse or promote products derived from
// this software without specific prior written permission.
//
// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
// "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
// LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
// A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
// OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
// SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
// LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
// DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
// THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.

// Utility functions and classes used by the Google C++ testing framework.
//
// Author: wan@google.com (Zhanyong Wan)
//
// This file contains purely Google Test's internal implementation.  Please
// DO NOT #INCLUDE IT IN A USER PROGRAM.

#ifndef GTEST_SRC_GTEST_INTERNAL_INL_H_
#define GTEST_SRC_GTEST_INTERNAL_INL_H_

// GTEST_IMPLEMENTATION_ is defined to 1 iff the current translation unit is
// part of Google Test's implementation; otherwise it's undefined.
#if !GTEST_IMPLEMENTATION_
// If this file is included from the user's code, just say no.
# error "gtest-internal-inl.h is part of Google Test's internal implementation."
# error "It must not be included except by Google Test itself."
#endif  // GTEST_IMPLEMENTATION_

#ifndef _WIN32_WCE
# include <errno.h>
#endif  // !_WIN32_WCE
#include <stddef.h>
#include <stdlib.h>  // For strtoll/_strtoul64/malloc/free.
#include <string.h>  // For memmove.

#include <algorithm>
#include <string>
#include <vector>


#if GTEST_CAN_STREAM_RESULTS_
# include <arpa/inet.h>  // NOLINT
# include <netdb.h>  // NOLINT
#endif

#if GTEST_OS_WINDOWS
# include <windows.h>  // NOLINT
#endif  // GTEST_OS_WINDOWS


namespace testing {

// Declares the flags.
//
// We don't want the users to modify this flag in the code, but want
// Google Test's own unit tests to be able to access it. Therefore we
// declare it here as opposed to in gtest.h.
GTEST_DECLARE_bool_(death_test_use_fork);

namespace internal {

// The value of GetTestTypeId() as seen from within the Google Test
// library.  This is solely for testing GetTestTypeId().
GTEST_API_ extern const TypeId kTestTypeIdInGoogleTest;

// Names of the flags (needed for parsing Google Test flags).
const char kAlsoRunDisabledTestsFlag[] = "also_run_disabled_tests";
const char kBreakOnFailureFlag[] = "break_on_failure";
const char kCatchExceptionsFlag[] = "catch_exceptions";
const char kColorFlag[] = "color";
const char kFilterFlag[] = "filter";
const char kListTestsFlag[] = "list_tests";
const char kOutputFlag[] = "output";
const char kPrintTimeFlag[] = "print_time";
const char kRandomSeedFlag[] = "random_seed";
const char kRepeatFlag[] = "repeat";
const char kShuffleFlag[] = "shuffle";
const char kStackTraceDepthFlag[] = "stack_trace_depth";
const char kStreamResultToFlag[] = "stream_result_to";
const char kThrowOnFailureFlag[] = "throw_on_failure";
const char kFlagfileFlag[] = "flagfile";

// A valid random seed must be in [1, kMaxRandomSeed].
const int kMaxRandomSeed = 99999;

// g_help_flag is true iff the --help flag or an equivalent form is
// specified on the command line.
GTEST_API_ extern bool g_help_flag;

// Returns the current time in milliseconds.
GTEST_API_ TimeInMillis GetTimeInMillis();

// Returns true iff Google Test should use colors in the output.
GTEST_API_ bool ShouldUseColor(bool stdout_is_tty);

// Formats the given time in milliseconds as seconds.
GTEST_API_ std::string FormatTimeInMillisAsSeconds(TimeInMillis ms);

// Converts the given time in milliseconds to a date string in the ISO 8601
// format, without the timezone information.  N.B.: due to the use the
// non-reentrant localtime() function, this function is not thread safe.  Do
// not use it in any code that can be called from multiple threads.
GTEST_API_ std::string FormatEpochTimeInMillisAsIso8601(TimeInMillis ms);

// Parses a string for an Int32 flag, in the form of "--flag=value".
//
// On success, stores the value of the flag in *value, and returns
// true.  On failure, returns false without changing *value.
GTEST_API_ bool ParseInt32Flag(
    const char* str, const char* flag, Int32* value);

// Returns a random seed in range [1, kMaxRandomSeed] based on the
// given --gtest_random_seed flag value.
inline int GetRandomSeedFromFlag(Int32 random_seed_flag) {
  const unsigned int raw_seed = (random_seed_flag == 0) ?
      static_cast<unsigned int>(GetTimeInMillis()) :
      static_cast<unsigned int>(random_seed_flag);

  // Normalizes the actual seed to range [1, kMaxRandomSeed] such that
  // it's easy to type.
  const int normalized_seed =
      static_cast<int>((raw_seed - 1U) %
                       static_cast<unsigned int>(kMaxRandomSeed)) + 1;
  return normalized_seed;
}

// Returns the first valid random seed after 'seed'.  The behavior is
// undefined if 'seed' is invalid.  The seed after kMaxRandomSeed is
// considered to be 1.
inline int GetNextRandomSeed(int seed) {
  GTEST_CHECK_(1 <= seed && seed <= kMaxRandomSeed)
      << "Invalid random seed " << seed << " - must be in [1, "
      << kMaxRandomSeed << "].";
  const int next_seed = seed + 1;
  return (next_seed > kMaxRandomSeed) ? 1 : next_seed;
}

// This class saves the values of all Google Test flags in its c'tor, and
// restores them in its d'tor.
class GTestFlagSaver {
 public:
  // The c'tor.
  GTestFlagSaver() {
    also_run_disabled_tests_ = GTEST_FLAG(also_run_disabled_tests);
    break_on_failure_ = GTEST_FLAG(break_on_failure);
    catch_exceptions_ = GTEST_FLAG(catch_exceptions);
    color_ = GTEST_FLAG(color);
    death_test_style_ = GTEST_FLAG(death_test_style);
    death_test_use_fork_ = GTEST_FLAG(death_test_use_fork);
    filter_ = GTEST_FLAG(filter);
    internal_run_death_test_ = GTEST_FLAG(internal_run_death_test);
    list_tests_ = GTEST_FLAG(list_tests);
    output_ = GTEST_FLAG(output);
    print_time_ = GTEST_FLAG(print_time);
    random_seed_ = GTEST_FLAG(random_seed);
    repeat_ = GTEST_FLAG(repeat);
    shuffle_ = GTEST_FLAG(shuffle);
    stack_trace_depth_ = GTEST_FLAG(stack_trace_depth);
    stream_result_to_ = GTEST_FLAG(stream_result_to);
    throw_on_failure_ = GTEST_FLAG(throw_on_failure);
  }

  // The d'tor is not virtual.  DO NOT INHERIT FROM THIS CLASS.
  ~GTestFlagSaver() {
    GTEST_FLAG(also_run_disabled_tests) = also_run_disabled_tests_;
    GTEST_FLAG(break_on_failure) = break_on_failure_;
    GTEST_FLAG(catch_exceptions) = catch_exceptions_;
    GTEST_FLAG(color) = color_;
    GTEST_FLAG(death_test_style) = death_test_style_;
    GTEST_FLAG(death_test_use_fork) = death_test_use_fork_;
    GTEST_FLAG(filter) = filter_;
    GTEST_FLAG(internal_run_death_test) = internal_run_death_test_;
    GTEST_FLAG(list_tests) = list_tests_;
    GTEST_FLAG(output) = output_;
    GTEST_FLAG(print_time) = print_time_;
    GTEST_FLAG(random_seed) = random_seed_;
    GTEST_FLAG(repeat) = repeat_;
    GTEST_FLAG(shuffle) = shuffle_;
    GTEST_FLAG(stack_trace_depth) = stack_trace_depth_;
    GTEST_FLAG(stream_result_to) = stream_result_to_;
    GTEST_FLAG(throw_on_failure) = throw_on_failure_;
  }

 private:
  // Fields for saving the original values of flags.
  bool also_run_disabled_tests_;
  bool break_on_failure_;
  bool catch_exceptions_;
  std::string color_;
  std::string death_test_style_;
  bool death_test_use_fork_;
  std::string filter_;
  std::string internal_run_death_test_;
  bool list_tests_;
  std::string output_;
  bool print_time_;
  internal::Int32 random_seed_;
  internal::Int32 repeat_;
  bool shuffle_;
  internal::Int32 stack_trace_depth_;
  std::string stream_result_to_;
  bool throw_on_failure_;
} GTEST_ATTRIBUTE_UNUSED_;

// Converts a Unicode code point to a narrow string in UTF-8 encoding.
// code_point parameter is of type UInt32 because wchar_t may not be
// wide enough to contain a code point.
// If the code_point is not a valid Unicode code point
// (i.e. outside of Unicode range U+0 to U+10FFFF) it will be converted
// to "(Invalid Unicode 0xXXXXXXXX)".
GTEST_API_ std::string CodePointToUtf8(UInt32 code_point);

// Converts a wide string to a narrow string in UTF-8 encoding.
// The wide string is assumed to have the following encoding:
//   UTF-16 if sizeof(wchar_t) == 2 (on Windows, Cygwin, Symbian OS)
//   UTF-32 if sizeof(wchar_t) == 4 (on Linux)
// Parameter str points to a null-terminated wide string.
// Parameter num_chars may additionally limit the number
// of wchar_t characters processed. -1 is used when the entire string
// should be processed.
// If the string contains code points that are not valid Unicode code points
// (i.e. outside of Unicode range U+0 to U+10FFFF) they will be output
// as '(Invalid Unicode 0xXXXXXXXX)'. If the string is in UTF16 encoding
// and contains invalid UTF-16 surrogate pairs, values in those pairs
// will be encoded as individual Unicode characters from Basic Normal Plane.
GTEST_API_ std::string WideStringToUtf8(const wchar_t* str, int num_chars);

// Reads the GTEST_SHARD_STATUS_FILE environment variable, and creates the file
// if the variable is present. If a file already exists at this location, this
// function will write over it. If the variable is present, but the file cannot
// be created, prints an error and exits.
void WriteToShardStatusFileIfNeeded();

// Checks whether sharding is enabled by examining the relevant
// environment variable values. If the variables are present,
// but inconsistent (e.g., shard_index >= total_shards), prints
// an error and exits. If in_subprocess_for_death_test, sharding is
// disabled because it must only be applied to the original test
// process. Otherwise, we could filter out death tests we intended to execute.
GTEST_API_ bool ShouldShard(const char* total_shards_str,
                            const char* shard_index_str,
                            bool in_subprocess_for_death_test);

// Parses the environment variable var as an Int32. If it is unset,
// returns default_val. If it is not an Int32, prints an error and
// and aborts.
GTEST_API_ Int32 Int32FromEnvOrDie(const char* env_var, Int32 default_val);

// Given the total number of shards, the shard index, and the test id,
// returns true iff the test should be run on this shard. The test id is
// some arbitrary but unique non-negative integer assigned to each test
// method. Assumes that 0 <= shard_index < total_shards.
GTEST_API_ bool ShouldRunTestOnShard(
    int total_shards, int shard_index, int test_id);

// STL container utilities.

// Returns the number of elements in the given container that satisfy
// the given predicate.
template <class Container, typename Predicate>
inline int CountIf(const Container& c, Predicate predicate) {
  // Implemented as an explicit loop since std::count_if() in libCstd on
  // Solaris has a non-standard signature.
  int count = 0;
  for (typename Container::const_iterator it = c.begin(); it != c.end(); ++it) {
    if (predicate(*it))
      ++count;
  }
  return count;
}

// Applies a function/functor to each element in the container.
template <class Container, typename Functor>
void ForEach(const Container& c, Functor functor) {
  std::for_each(c.begin(), c.end(), functor);
}

// Returns the i-th element of the vector, or default_value if i is not
// in range [0, v.size()).
template <typename E>
inline E GetElementOr(const std::vector<E>& v, int i, E default_value) {
  return (i < 0 || i >= static_cast<int>(v.size())) ? default_value : v[i];
}

// Performs an in-place shuffle of a range of the vector's elements.
// 'begin' and 'end' are element indices as an STL-style range;
// i.e. [begin, end) are shuffled, where 'end' == size() means to
// shuffle to the end of the vector.
template <typename E>
void ShuffleRange(internal::Random* random, int begin, int end,
                  std::vector<E>* v) {
  const int size = static_cast<int>(v->size());
  GTEST_CHECK_(0 <= begin && begin <= size)
      << "Invalid shuffle range start " << begin << ": must be in range [0, "
      << size << "].";
  GTEST_CHECK_(begin <= end && end <= size)
      << "Invalid shuffle range finish " << end << ": must be in range ["
      << begin << ", " << size << "].";

  // Fisher-Yates shuffle, from
  // http://en.wikipedia.org/wiki/Fisher-Yates_shuffle
  for (int range_width = end - begin; range_width >= 2; range_width--) {
    const int last_in_range = begin + range_width - 1;
    const int selected = begin + random->Generate(range_width);
    std::swap((*v)[selected], (*v)[last_in_range]);
  }
}

// Performs an in-place shuffle of the vector's elements.
template <typename E>
inline void Shuffle(internal::Random* random, std::vector<E>* v) {
  ShuffleRange(random, 0, static_cast<int>(v->size()), v);
}

// A function for deleting an object.  Handy for being used as a
// functor.
template <typename T>
static void Delete(T* x) {
  delete x;
}

// A predicate that checks the key of a TestProperty against a known key.
//
// TestPropertyKeyIs is copyable.
class TestPropertyKeyIs {
 public:
  // Constructor.
  //
  // TestPropertyKeyIs has NO default constructor.
  explicit TestPropertyKeyIs(const std::string& key) : key_(key) {}

  // Returns true iff the test name of test property matches on key_.
  bool operator()(const TestProperty& test_property) const {
    return test_property.key() == key_;
  }

 private:
  std::string key_;
};

// Class UnitTestOptions.
//
// This class contains functions for processing options the user
// specifies when running the tests.  It has only static members.
//
// In most cases, the user can specify an option using either an
// environment variable or a command line flag.  E.g. you can set the
// test filter using either GTEST_FILTER or --gtest_filter.  If both
// the variable and the flag are present, the latter overrides the
// former.
class GTEST_API_ UnitTestOptions {
 public:
  // Functions for processing the gtest_output flag.

  // Returns the output format, or "" for normal printed output.
  static std::string GetOutputFormat();

  // Returns the absolute path of the requested output file, or the
  // default (test_detail.xml in the original working directory) if
  // none was explicitly specified.
  static std::string GetAbsolutePathToOutputFile();

  // Functions for processing the gtest_filter flag.

  // Returns true iff the wildcard pattern matches the string.  The
  // first ':' or '\0' character in pattern marks the end of it.
  //
  // This recursive algorithm isn't very efficient, but is clear and
  // works well enough for matching test names, which are short.
  static bool PatternMatchesString(const char *pattern, const char *str);

  // Returns true iff the user-specified filter matches the test case
  // name and the test name.
  static bool FilterMatchesTest(const std::string &test_case_name,
                                const std::string &test_name);

#if GTEST_OS_WINDOWS
  // Function for supporting the gtest_catch_exception flag.

  // Returns EXCEPTION_EXECUTE_HANDLER if Google Test should handle the
  // given SEH exception, or EXCEPTION_CONTINUE_SEARCH otherwise.
  // This function is useful as an __except condition.
  static int GTestShouldProcessSEH(DWORD exception_code);
#endif  // GTEST_OS_WINDOWS

  // Returns true if "name" matches the ':' separated list of glob-style
  // filters in "filter".
  static bool MatchesFilter(const std::string& name, const char* filter);
};

// Returns the current application's name, removing directory path if that
// is present.  Used by UnitTestOptions::GetOutputFile.
GTEST_API_ FilePath GetCurrentExecutableName();

// The role interface for getting the OS stack trace as a string.
class OsStackTraceGetterInterface {
 public:
  OsStackTraceGetterInterface() {}
  virtual ~OsStackTraceGetterInterface() {}

  // Returns the current OS stack trace as an std::string.  Parameters:
  //
  //   max_depth  - the maximum number of stack frames to be included
  //                in the trace.
  //   skip_count - the number of top frames to be skipped; doesn't count
  //                against max_depth.
  virtual string CurrentStackTrace(int max_depth, int skip_count) = 0;

  // UponLeavingGTest() should be called immediately before Google Test calls
  // user code. It saves some information about the current stack that
  // CurrentStackTrace() will use to find and hide Google Test stack frames.
  virtual void UponLeavingGTest() = 0;

  // This string is inserted in place of stack frames that are part of
  // Google Test's implementation.
  static const char* const kElidedFramesMarker;

 private:
  GTEST_DISALLOW_COPY_AND_ASSIGN_(OsStackTraceGetterInterface);
};

// A working implementation of the OsStackTraceGetterInterface interface.
class OsStackTraceGetter : public OsStackTraceGetterInterface {
 public:
  OsStackTraceGetter() {}

  virtual string CurrentStackTrace(int max_depth, int skip_count);
  virtual void UponLeavingGTest();

 private:
  GTEST_DISALLOW_COPY_AND_ASSIGN_(OsStackTraceGetter);
};

// Information about a Google Test trace point.
struct TraceInfo {
  const char* file;
  int line;
  std::string message;
};

// This is the default global test part result reporter used in UnitTestImpl.
// This class should only be used by UnitTestImpl.
class DefaultGlobalTestPartResultReporter
  : public TestPartResultReporterInterface {
 public:
  explicit DefaultGlobalTestPartResultReporter(UnitTestImpl* unit_test);
  // Implements the TestPartResultReporterInterface. Reports the test part
  // result in the current test.
  virtual void ReportTestPartResult(const TestPartResult& result);

 private:
  UnitTestImpl* const unit_test_;

  GTEST_DISALLOW_COPY_AND_ASSIGN_(DefaultGlobalTestPartResultReporter);
};

// This is the default per thread test part result reporter used in
// UnitTestImpl. This class should only be used by UnitTestImpl.
class DefaultPerThreadTestPartResultReporter
    : public TestPartResultReporterInterface {
 public:
  explicit DefaultPerThreadTestPartResultReporter(UnitTestImpl* unit_test);
  // Implements the TestPartResultReporterInterface. The implementation just
  // delegates to the current global test part result reporter of *unit_test_.
  virtual void ReportTestPartResult(const TestPartResult& result);

 private:
  UnitTestImpl* const unit_test_;

  GTEST_DISALLOW_COPY_AND_ASSIGN_(DefaultPerThreadTestPartResultReporter);
};

// The private implementation of the UnitTest class.  We don't protect
// the methods under a mutex, as this class is not accessible by a
// user and the UnitTest class that delegates work to this class does
// proper locking.
class GTEST_API_ UnitTestImpl {
 public:
  explicit UnitTestImpl(UnitTest* parent);
  virtual ~UnitTestImpl();

  // There are two different ways to register your own TestPartResultReporter.
  // You can register your own repoter to listen either only for test results
  // from the current thread or for results from all threads.
  // By default, each per-thread test result repoter just passes a new
  // TestPartResult to the global test result reporter, which registers the
  // test part result for the currently running test.

  // Returns the global test part result reporter.
  TestPartResultReporterInterface* GetGlobalTestPartResultReporter();

  // Sets the global test part result reporter.
  void SetGlobalTestPartResultReporter(
      TestPartResultReporterInterface* reporter);

  // Returns the test part result reporter for the current thread.
  TestPartResultReporterInterface* GetTestPartResultReporterForCurrentThread();

  // Sets the test part result reporter for the current thread.
  void SetTestPartResultReporterForCurrentThread(
      TestPartResultReporterInterface* reporter);

  // Gets the number of successful test cases.
  int successful_test_case_count() const;

  // Gets the number of failed test cases.
  int failed_test_case_count() const;

  // Gets the number of all test cases.
  int total_test_case_count() const;

  // Gets the number of all test cases that contain at least one test
  // that should run.
  int test_case_to_run_count() const;

  // Gets the number of successful tests.
  int successful_test_count() const;

  // Gets the number of failed tests.
  int failed_test_count() const;

  // Gets the number of disabled tests that will be reported in the XML report.
  int reportable_disabled_test_count() const;

  // Gets the number of disabled tests.
  int disabled_test_count() const;

  // Gets the number of tests to be printed in the XML report.
  int reportable_test_count() const;

  // Gets the number of all tests.
  int total_test_count() const;

  // Gets the number of tests that should run.
  int test_to_run_count() const;

  // Gets the time of the test program start, in ms from the start of the
  // UNIX epoch.
  TimeInMillis start_timestamp() const { return start_timestamp_; }

  // Gets the elapsed time, in milliseconds.
  TimeInMillis elapsed_time() const { return elapsed_time_; }

  // Returns true iff the unit test passed (i.e. all test cases passed).
  bool Passed() const { return !Failed(); }

  // Returns true iff the unit test failed (i.e. some test case failed
  // or something outside of all tests failed).
  bool Failed() const {
    return failed_test_case_count() > 0 || ad_hoc_test_result()->Failed();
  }

  // Gets the i-th test case among all the test cases. i can range from 0 to
  // total_test_case_count() - 1. If i is not in that range, returns NULL.
  const TestCase* GetTestCase(int i) const {
    const int index = GetElementOr(test_case_indices_, i, -1);
    return index < 0 ? NULL : test_cases_[i];
  }

  // Gets the i-th test case among all the test cases. i can range from 0 to
  // total_test_case_count() - 1. If i is not in that range, returns NULL.
  TestCase* GetMutableTestCase(int i) {
    const int index = GetElementOr(test_case_indices_, i, -1);
    return index < 0 ? NULL : test_cases_[index];
  }

  // Provides access to the event listener list.
  TestEventListeners* listeners() { return &listeners_; }

  // Returns the TestResult for the test that's currently running, or
  // the TestResult for the ad hoc test if no test is running.
  TestResult* current_test_result();

  // Returns the TestResult for the ad hoc test.
  const TestResult* ad_hoc_test_result() const { return &ad_hoc_test_result_; }

  // Sets the OS stack trace getter.
  //
  // Does nothing if the input and the current OS stack trace getter
  // are the same; otherwise, deletes the old getter and makes the
  // input the current getter.
  void set_os_stack_trace_getter(OsStackTraceGetterInterface* getter);

  // Returns the current OS stack trace getter if it is not NULL;
  // otherwise, creates an OsStackTraceGetter, makes it the current
  // getter, and returns it.
  OsStackTraceGetterInterface* os_stack_trace_getter();

  // Returns the current OS stack trace as an std::string.
  //
  // The maximum number of stack frames to be included is specified by
  // the gtest_stack_trace_depth flag.  The skip_count parameter
  // specifies the number of top frames to be skipped, which doesn't
  // count against the number of frames to be included.
  //
  // For example, if Foo() calls Bar(), which in turn calls
  // CurrentOsStackTraceExceptTop(1), Foo() will be included in the
  // trace but Bar() and CurrentOsStackTraceExceptTop() won't.
  std::string CurrentOsStackTraceExceptTop(int skip_count) GTEST_NO_INLINE_;

  // Finds and returns a TestCase with the given name.  If one doesn't
  // exist, creates one and returns it.
  //
  // Arguments:
  //
  //   test_case_name: name of the test case
  //   type_param:     the name of the test's type parameter, or NULL if
  //                   this is not a typed or a type-parameterized test.
  //   set_up_tc:      pointer to the function that sets up the test case
  //   tear_down_tc:   pointer to the function that tears down the test case
  TestCase* GetTestCase(const char* test_case_name,
                        const char* type_param,
                        Test::SetUpTestCaseFunc set_up_tc,
                        Test::TearDownTestCaseFunc tear_down_tc);

  // Adds a TestInfo to the unit test.
  //
  // Arguments:
  //
  //   set_up_tc:    pointer to the function that sets up the test case
  //   tear_down_tc: pointer to the function that tears down the test case
  //   test_info:    the TestInfo object
  void AddTestInfo(Test::SetUpTestCaseFunc set_up_tc,
                   Test::TearDownTestCaseFunc tear_down_tc,
                   TestInfo* test_info) {
    // In order to support thread-safe death tests, we need to
    // remember the original working directory when the test program
    // was first invoked.  We cannot do this in RUN_ALL_TESTS(), as
    // the user may have changed the current directory before calling
    // RUN_ALL_TESTS().  Therefore we capture the current directory in
    // AddTestInfo(), which is called to register a TEST or TEST_F
    // before main() is reached.
    if (original_working_dir_.IsEmpty()) {
      original_working_dir_.Set(FilePath::GetCurrentDir());
      GTEST_CHECK_(!original_working_dir_.IsEmpty())
          << "Failed to get the current working directory.";
    }

    GetTestCase(test_info->test_case_name(),
                test_info->type_param(),
                set_up_tc,
                tear_down_tc)->AddTestInfo(test_info);
  }

#if GTEST_HAS_PARAM_TEST
  // Returns ParameterizedTestCaseRegistry object used to keep track of
  // value-parameterized tests and instantiate and register them.
  internal::ParameterizedTestCaseRegistry& parameterized_test_registry() {
    return parameterized_test_registry_;
  }
#endif  // GTEST_HAS_PARAM_TEST

  // Sets the TestCase object for the test that's currently running.
  void set_current_test_case(TestCase* a_current_test_case) {
    current_test_case_ = a_current_test_case;
  }

  // Sets the TestInfo object for the test that's currently running.  If
  // current_test_info is NULL, the assertion results will be stored in
  // ad_hoc_test_result_.
  void set_current_test_info(TestInfo* a_current_test_info) {
    current_test_info_ = a_current_test_info;
  }

  // Registers all parameterized tests defined using TEST_P and
  // INSTANTIATE_TEST_CASE_P, creating regular tests for each test/parameter
  // combination. This method can be called more then once; it has guards
  // protecting from registering the tests more then once.  If
  // value-parameterized tests are disabled, RegisterParameterizedTests is
  // present but does nothing.
  void RegisterParameterizedTests();

  // Runs all tests in this UnitTest object, prints the result, and
  // returns true if all tests are successful.  If any exception is
  // thrown during a test, this test is considered to be failed, but
  // the rest of the tests will still be run.
  bool RunAllTests();

  // Clears the results of all tests, except the ad hoc tests.
  void ClearNonAdHocTestResult() {
    ForEach(test_cases_, TestCase::ClearTestCaseResult);
  }

  // Clears the results of ad-hoc test assertions.
  void ClearAdHocTestResult() {
    ad_hoc_test_result_.Clear();
  }

  // Adds a TestProperty to the current TestResult object when invoked in a
  // context of a test or a test case, or to the global property set. If the
  // result already contains a property with the same key, the value will be
  // updated.
  void RecordProperty(const TestProperty& test_property);

  enum ReactionToSharding {
    HONOR_SHARDING_PROTOCOL,
    IGNORE_SHARDING_PROTOCOL
  };

  // Matches the full name of each test against the user-specified
  // filter to decide whether the test should run, then records the
  // result in each TestCase and TestInfo object.
  // If shard_tests == HONOR_SHARDING_PROTOCOL, further filters tests
  // based on sharding variables in the environment.
  // Returns the number of tests that should run.
  int FilterTests(ReactionToSharding shard_tests);

  // Prints the names of the tests matching the user-specified filter flag.
  void ListTestsMatchingFilter();

  const TestCase* current_test_case() const { return current_test_case_; }
  TestInfo* current_test_info() { return current_test_info_; }
  const TestInfo* current_test_info() const { return current_test_info_; }

  // Returns the vector of environments that need to be set-up/torn-down
  // before/after the tests are run.
  std::vector<Environment*>& environments() { return environments_; }

  // Getters for the per-thread Google Test trace stack.
  std::vector<TraceInfo>& gtest_trace_stack() {
    return *(gtest_trace_stack_.pointer());
  }
  const std::vector<TraceInfo>& gtest_trace_stack() const {
    return gtest_trace_stack_.get();
  }

#if GTEST_HAS_DEATH_TEST
  void InitDeathTestSubprocessControlInfo() {
    internal_run_death_test_flag_.reset(ParseInternalRunDeathTestFlag());
  }
  // Returns a pointer to the parsed --gtest_internal_run_death_test
  // flag, or NULL if that flag was not specified.
  // This information is useful only in a death test child process.
  // Must not be called before a call to InitGoogleTest.
  const InternalRunDeathTestFlag* internal_run_death_test_flag() const {
    return internal_run_death_test_flag_.get();
  }

  // Returns a pointer to the current death test factory.
  internal::DeathTestFactory* death_test_factory() {
    return death_test_factory_.get();
  }

  void SuppressTestEventsIfInSubprocess();

  friend class ReplaceDeathTestFactory;
#endif  // GTEST_HAS_DEATH_TEST

  // Initializes the event listener performing XML output as specified by
  // UnitTestOptions. Must not be called before InitGoogleTest.
  void ConfigureXmlOutput();

#if GTEST_CAN_STREAM_RESULTS_
  // Initializes the event listener for streaming test results to a socket.
  // Must not be called before InitGoogleTest.
  void ConfigureStreamingOutput();
#endif

  // Performs initialization dependent upon flag values obtained in
  // ParseGoogleTestFlagsOnly.  Is called from InitGoogleTest after the call to
  // ParseGoogleTestFlagsOnly.  In case a user neglects to call InitGoogleTest
  // this function is also called from RunAllTests.  Since this function can be
  // called more than once, it has to be idempotent.
  void PostFlagParsingInit();

  // Gets the random seed used at the start of the current test iteration.
  int random_seed() const { return random_seed_; }

  // Gets the random number generator.
  internal::Random* random() { return &random_; }

  // Shuffles all test cases, and the tests within each test case,
  // making sure that death tests are still run first.
  void ShuffleTests();

  // Restores the test cases and tests to their order before the first shuffle.
  void UnshuffleTests();

  // Returns the value of GTEST_FLAG(catch_exceptions) at the moment
  // UnitTest::Run() starts.
  bool catch_exceptions() const { return catch_exceptions_; }

 private:
  friend class ::testing::UnitTest;

  // Used by UnitTest::Run() to capture the state of
  // GTEST_FLAG(catch_exceptions) at the moment it starts.
  void set_catch_exceptions(bool value) { catch_exceptions_ = value; }

  // The UnitTest object that owns this implementation object.
  UnitTest* const parent_;

  // The working directory when the first TEST() or TEST_F() was
  // executed.
  internal::FilePath original_working_dir_;

  // The default test part result reporters.
  DefaultGlobalTestPartResultReporter default_global_test_part_result_reporter_;
  DefaultPerThreadTestPartResultReporter
      default_per_thread_test_part_result_reporter_;

  // Points to (but doesn't own) the global test part result reporter.
  TestPartResultReporterInterface* global_test_part_result_repoter_;

  // Protects read and write access to global_test_part_result_reporter_.
  internal::Mutex global_test_part_result_reporter_mutex_;

  // Points to (but doesn't own) the per-thread test part result reporter.
  internal::ThreadLocal<TestPartResultReporterInterface*>
      per_thread_test_part_result_reporter_;

  // The vector of environments that need to be set-up/torn-down
  // before/after the tests are run.
  std::vector<Environment*> environments_;

  // The vector of TestCases in their original order.  It owns the
  // elements in the vector.
  std::vector<TestCase*> test_cases_;

  // Provides a level of indirection for the test case list to allow
  // easy shuffling and restoring the test case order.  The i-th
  // element of this vector is the index of the i-th test case in the
  // shuffled order.
  std::vector<int> test_case_indices_;

#if GTEST_HAS_PARAM_TEST
  // ParameterizedTestRegistry object used to register value-parameterized
  // tests.
  internal::ParameterizedTestCaseRegistry parameterized_test_registry_;

  // Indicates whether RegisterParameterizedTests() has been called already.
  bool parameterized_tests_registered_;
#endif  // GTEST_HAS_PARAM_TEST

  // Index of the last death test case registered.  Initially -1.
  int last_death_test_case_;

  // This points to the TestCase for the currently running test.  It
  // changes as Google Test goes through one test case after another.
  // When no test is running, this is set to NULL and Google Test
  // stores assertion results in ad_hoc_test_result_.  Initially NULL.
  TestCase* current_test_case_;

  // This points to the TestInfo for the currently running test.  It
  // changes as Google Test goes through one test after another.  When
  // no test is running, this is set to NULL and Google Test stores
  // assertion results in ad_hoc_test_result_.  Initially NULL.
  TestInfo* current_test_info_;

  // Normally, a user only writes assertions inside a TEST or TEST_F,
  // or inside a function called by a TEST or TEST_F.  Since Google
  // Test keeps track of which test is current running, it can
  // associate such an assertion with the test it belongs to.
  //
  // If an assertion is encountered when no TEST or TEST_F is running,
  // Google Test attributes the assertion result to an imaginary "ad hoc"
  // test, and records the result in ad_hoc_test_result_.
  TestResult ad_hoc_test_result_;

  // The list of event listeners that can be used to track events inside
  // Google Test.
  TestEventListeners listeners_;

  // The OS stack trace getter.  Will be deleted when the UnitTest
  // object is destructed.  By default, an OsStackTraceGetter is used,
  // but the user can set this field to use a custom getter if that is
  // desired.
  OsStackTraceGetterInterface* os_stack_trace_getter_;

  // True iff PostFlagParsingInit() has been called.
  bool post_flag_parse_init_performed_;

  // The random number seed used at the beginning of the test run.
  int random_seed_;

  // Our random number generator.
  internal::Random random_;

  // The time of the test program start, in ms from the start of the
  // UNIX epoch.
  TimeInMillis start_timestamp_;

  // How long the test took to run, in milliseconds.
  TimeInMillis elapsed_time_;

#if GTEST_HAS_DEATH_TEST
  // The decomposed components of the gtest_internal_run_death_test flag,
  // parsed when RUN_ALL_TESTS is called.
  internal::scoped_ptr<InternalRunDeathTestFlag> internal_run_death_test_flag_;
  internal::scoped_ptr<internal::DeathTestFactory> death_test_factory_;
#endif  // GTEST_HAS_DEATH_TEST

  // A per-thread stack of traces created by the SCOPED_TRACE() macro.
  internal::ThreadLocal<std::vector<TraceInfo> > gtest_trace_stack_;

  // The value of GTEST_FLAG(catch_exceptions) at the moment RunAllTests()
  // starts.
  bool catch_exceptions_;

  GTEST_DISALLOW_COPY_AND_ASSIGN_(UnitTestImpl);
};  // class UnitTestImpl

// Convenience function for accessing the global UnitTest
// implementation object.
inline UnitTestImpl* GetUnitTestImpl() {
  return UnitTest::GetInstance()->impl();
}

#if GTEST_USES_SIMPLE_RE

// Internal helper functions for implementing the simple regular
// expression matcher.
GTEST_API_ bool IsInSet(char ch, const char* str);
GTEST_API_ bool IsAsciiDigit(char ch);
GTEST_API_ bool IsAsciiPunct(char ch);
GTEST_API_ bool IsRepeat(char ch);
GTEST_API_ bool IsAsciiWhiteSpace(char ch);
GTEST_API_ bool IsAsciiWordChar(char ch);
GTEST_API_ bool IsValidEscape(char ch);
GTEST_API_ bool AtomMatchesChar(bool escaped, char pattern, char ch);
GTEST_API_ bool ValidateRegex(const char* regex);
GTEST_API_ bool MatchRegexAtHead(const char* regex, const char* str);
GTEST_API_ bool MatchRepetitionAndRegexAtHead(
    bool escaped, char ch, char repeat, const char* regex, const char* str);
GTEST_API_ bool MatchRegexAnywhere(const char* regex, const char* str);

#endif  // GTEST_USES_SIMPLE_RE

// Parses the command line for Google Test flags, without initializing
// other parts of Google Test.
GTEST_API_ void ParseGoogleTestFlagsOnly(int* argc, char** argv);
GTEST_API_ void ParseGoogleTestFlagsOnly(int* argc, wchar_t** argv);

#if GTEST_HAS_DEATH_TEST

// Returns the message describing the last system error, regardless of the
// platform.
GTEST_API_ std::string GetLastErrnoDescription();

// Attempts to parse a string into a positive integer pointed to by the
// number parameter.  Returns true if that is possible.
// GTEST_HAS_DEATH_TEST implies that we have ::std::string, so we can use
// it here.
template <typename Integer>
bool ParseNaturalNumber(const ::std::string& str, Integer* number) {
  // Fail fast if the given string does not begin with a digit;
  // this bypasses strtoXXX's "optional leading whitespace and plus
  // or minus sign" semantics, which are undesirable here.
  if (str.empty() || !IsDigit(str[0])) {
    return false;
  }
  errno = 0;

  char* end;
  // BiggestConvertible is the largest integer type that system-provided
  // string-to-number conversion routines can return.

# if GTEST_OS_WINDOWS && !defined(__GNUC__)

  // MSVC and C++ Builder define __int64 instead of the standard long long.
  typedef unsigned __int64 BiggestConvertible;
  const BiggestConvertible parsed = _strtoui64(str.c_str(), &end, 10);

# else

  typedef unsigned long long BiggestConvertible;  // NOLINT
  const BiggestConvertible parsed = strtoull(str.c_str(), &end, 10);

# endif  // GTEST_OS_WINDOWS && !defined(__GNUC__)

  const bool parse_success = *end == '\0' && errno == 0;

  // TODO(vladl@google.com): Convert this to compile time assertion when it is
  // available.
  GTEST_CHECK_(sizeof(Integer) <= sizeof(parsed));

  const Integer result = static_cast<Integer>(parsed);
  if (parse_success && static_cast<BiggestConvertible>(result) == parsed) {
    *number = result;
    return true;
  }
  return false;
}
#endif  // GTEST_HAS_DEATH_TEST

// TestResult contains some private methods that should be hidden from
// Google Test user but are required for testing. This class allow our tests
// to access them.
//
// This class is supplied only for the purpose of testing Google Test's own
// constructs. Do not use it in user tests, either directly or indirectly.
class TestResultAccessor {
 public:
  static void RecordProperty(TestResult* test_result,
                             const std::string& xml_element,
                             const TestProperty& property) {
    test_result->RecordProperty(xml_element, property);
  }

  static void ClearTestPartResults(TestResult* test_result) {
    test_result->ClearTestPartResults();
  }

  static const std::vector<testing::TestPartResult>& test_part_results(
      const TestResult& test_result) {
    return test_result.test_part_results();
  }
};

#if GTEST_CAN_STREAM_RESULTS_

// Streams test results to the given port on the given host machine.
class GTEST_API_ StreamingListener : public EmptyTestEventListener {
 public:
  // Abstract base class for writing strings to a socket.
  class AbstractSocketWriter {
   public:
    virtual ~AbstractSocketWriter() {}

    // Sends a string to the socket.
    virtual void Send(const string& message) = 0;

    // Closes the socket.
    virtual void CloseConnection() {}

    // Sends a string and a newline to the socket.
    void SendLn(const string& message) {
      Send(message + "\n");
    }
  };

  // Concrete class for actually writing strings to a socket.
  class SocketWriter : public AbstractSocketWriter {
   public:
    SocketWriter(const string& host, const string& port)
        : sockfd_(-1), host_name_(host), port_num_(port) {
      MakeConnection();
    }

    virtual ~SocketWriter() {
      if (sockfd_ != -1)
        CloseConnection();
    }

    // Sends a string to the socket.
    virtual void Send(const string& message) {
      GTEST_CHECK_(sockfd_ != -1)
          << "Send() can be called only when there is a connection.";

      const int len = static_cast<int>(message.length());
      if (write(sockfd_, message.c_str(), len) != len) {
        GTEST_LOG_(WARNING)
            << "stream_result_to: failed to stream to "
            << host_name_ << ":" << port_num_;
      }
    }

   private:
    // Creates a client socket and connects to the server.
    void MakeConnection();

    // Closes the socket.
    void CloseConnection() {
      GTEST_CHECK_(sockfd_ != -1)
          << "CloseConnection() can be called only when there is a connection.";

      close(sockfd_);
      sockfd_ = -1;
    }

    int sockfd_;  // socket file descriptor
    const string host_name_;
    const string port_num_;

    GTEST_DISALLOW_COPY_AND_ASSIGN_(SocketWriter);
  };  // class SocketWriter

  // Escapes '=', '&', '%', and '\n' characters in str as "%xx".
  static string UrlEncode(const char* str);

  StreamingListener(const string& host, const string& port)
      : socket_writer_(new SocketWriter(host, port)) { Start(); }

  explicit StreamingListener(AbstractSocketWriter* socket_writer)
      : socket_writer_(socket_writer) { Start(); }

  void OnTestProgramStart(const UnitTest& /* unit_test */) {
    SendLn("event=TestProgramStart");
  }

  void OnTestProgramEnd(const UnitTest& unit_test) {
    // Note that Google Test current only report elapsed time for each
    // test iteration, not for the entire test program.
    SendLn("event=TestProgramEnd&passed=" + FormatBool(unit_test.Passed()));

    // Notify the streaming server to stop.
    socket_writer_->CloseConnection();
  }

  void OnTestIterationStart(const UnitTest& /* unit_test */, int iteration) {
    SendLn("event=TestIterationStart&iteration=" +
           StreamableToString(iteration));
  }

  void OnTestIterationEnd(const UnitTest& unit_test, int /* iteration */) {
    SendLn("event=TestIterationEnd&passed=" +
           FormatBool(unit_test.Passed()) + "&elapsed_time=" +
           StreamableToString(unit_test.elapsed_time()) + "ms");
  }

  void OnTestCaseStart(const TestCase& test_case) {
    SendLn(std::string("event=TestCaseStart&name=") + test_case.name());
  }

  void OnTestCaseEnd(const TestCase& test_case) {
    SendLn("event=TestCaseEnd&passed=" + FormatBool(test_case.Passed())
           + "&elapsed_time=" + StreamableToString(test_case.elapsed_time())
           + "ms");
  }

  void OnTestStart(const TestInfo& test_info) {
    SendLn(std::string("event=TestStart&name=") + test_info.name());
  }

  void OnTestEnd(const TestInfo& test_info) {
    SendLn("event=TestEnd&passed=" +
           FormatBool((test_info.result())->Passed()) +
           "&elapsed_time=" +
           StreamableToString((test_info.result())->elapsed_time()) + "ms");
  }

  void OnTestPartResult(const TestPartResult& test_part_result) {
    const char* file_name = test_part_result.file_name();
    if (file_name == NULL)
      file_name = "";
    SendLn("event=TestPartResult&file=" + UrlEncode(file_name) +
           "&line=" + StreamableToString(test_part_result.line_number()) +
           "&message=" + UrlEncode(test_part_result.message()));
  }

 private:
  // Sends the given message and a newline to the socket.
  void SendLn(const string& message) { socket_writer_->SendLn(message); }

  // Called at the start of streaming to notify the receiver what
  // protocol we are using.
  void Start() { SendLn("gtest_streaming_protocol_version=1.0"); }

  string FormatBool(bool value) { return value ? "1" : "0"; }

  const scoped_ptr<AbstractSocketWriter> socket_writer_;

  GTEST_DISALLOW_COPY_AND_ASSIGN_(StreamingListener);
};  // class StreamingListener

#endif  // GTEST_CAN_STREAM_RESULTS_

}  // namespace internal
}  // namespace testing

#endif  // GTEST_SRC_GTEST_INTERNAL_INL_H_
#undef GTEST_IMPLEMENTATION_

#if GTEST_OS_WINDOWS
# define vsnprintf _vsnprintf
#endif  // GTEST_OS_WINDOWS

namespace testing {

using internal::CountIf;
using internal::ForEach;
using internal::GetElementOr;
using internal::Shuffle;

// Constants.

// A test whose test case name or test name matches this filter is
// disabled and not run.
static const char kDisableTestFilter[] = "DISABLED_*:*/DISABLED_*";

// A test case whose name matches this filter is considered a death
// test case and will be run before test cases whose name doesn't
// match this filter.
static const char kDeathTestCaseFilter[] = "*DeathTest:*DeathTest/*";

// A test filter that matches everything.
static const char kUniversalFilter[] = "*";

// The default output file for XML output.
static const char kDefaultOutputFile[] = "test_detail.xml";

// The environment variable name for the test shard index.
static const char kTestShardIndex[] = "GTEST_SHARD_INDEX";
// The environment variable name for the total number of test shards.
static const char kTestTotalShards[] = "GTEST_TOTAL_SHARDS";
// The environment variable name for the test shard status file.
static const char kTestShardStatusFile[] = "GTEST_SHARD_STATUS_FILE";

namespace internal {

// The text used in failure messages to indicate the start of the
// stack trace.
const char kStackTraceMarker[] = "\nStack trace:\n";

// g_help_flag is true iff the --help flag or an equivalent form is
// specified on the command line.
bool g_help_flag = false;

}  // namespace internal

static const char* GetDefaultFilter() {
#ifdef GTEST_TEST_FILTER_ENV_VAR_
  const char* const testbridge_test_only = getenv(GTEST_TEST_FILTER_ENV_VAR_);
  if (testbridge_test_only != NULL) {
    return testbridge_test_only;
  }
#endif  // GTEST_TEST_FILTER_ENV_VAR_
  return kUniversalFilter;
}

GTEST_DEFINE_bool_(
    also_run_disabled_tests,
    internal::BoolFromGTestEnv("also_run_disabled_tests", false),
    "Run disabled tests too, in addition to the tests normally being run.");

GTEST_DEFINE_bool_(
    break_on_failure,
    internal::BoolFromGTestEnv("break_on_failure", false),
    "True iff a failed assertion should be a debugger break-point.");

GTEST_DEFINE_bool_(
    catch_exceptions,
    internal::BoolFromGTestEnv("catch_exceptions", true),
    "True iff " GTEST_NAME_
    " should catch exceptions and treat them as test failures.");

GTEST_DEFINE_string_(
    color,
    internal::StringFromGTestEnv("color", "auto"),
    "Whether to use colors in the output.  Valid values: yes, no, "
    "and auto.  'auto' means to use colors if the output is "
    "being sent to a terminal and the TERM environment variable "
    "is set to a terminal type that supports colors.");

GTEST_DEFINE_string_(
    filter,
    internal::StringFromGTestEnv("filter", GetDefaultFilter()),
    "A colon-separated list of glob (not regex) patterns "
    "for filtering the tests to run, optionally followed by a "
    "'-' and a : separated list of negative patterns (tests to "
    "exclude).  A test is run if it matches one of the positive "
    "patterns and does not match any of the negative patterns.");

GTEST_DEFINE_bool_(list_tests, false,
                   "List all tests without running them.");

GTEST_DEFINE_string_(
    output,
    internal::StringFromGTestEnv("output", ""),
    "A format (currently must be \"xml\"), optionally followed "
    "by a colon and an output file name or directory. A directory "
    "is indicated by a trailing pathname separator. "
    "Examples: \"xml:filename.xml\", \"xml::directoryname/\". "
    "If a directory is specified, output files will be created "
    "within that directory, with file-names based on the test "
    "executable's name and, if necessary, made unique by adding "
    "digits.");

GTEST_DEFINE_bool_(
    print_time,
    internal::BoolFromGTestEnv("print_time", true),
    "True iff " GTEST_NAME_
    " should display elapsed time in text output.");

GTEST_DEFINE_int32_(
    random_seed,
    internal::Int32FromGTestEnv("random_seed", 0),
    "Random number seed to use when shuffling test orders.  Must be in range "
    "[1, 99999], or 0 to use a seed based on the current time.");

GTEST_DEFINE_int32_(
    repeat,
    internal::Int32FromGTestEnv("repeat", 1),
    "How many times to repeat each test.  Specify a negative number "
    "for repeating forever.  Useful for shaking out flaky tests.");

GTEST_DEFINE_bool_(
    show_internal_stack_frames, false,
    "True iff " GTEST_NAME_ " should include internal stack frames when "
    "printing test failure stack traces.");

GTEST_DEFINE_bool_(
    shuffle,
    internal::BoolFromGTestEnv("shuffle", false),
    "True iff " GTEST_NAME_
    " should randomize tests' order on every run.");

GTEST_DEFINE_int32_(
    stack_trace_depth,
    internal::Int32FromGTestEnv("stack_trace_depth", kMaxStackTraceDepth),
    "The maximum number of stack frames to print when an "
    "assertion fails.  The valid range is 0 through 100, inclusive.");

GTEST_DEFINE_string_(
    stream_result_to,
    internal::StringFromGTestEnv("stream_result_to", ""),
    "This flag specifies the host name and the port number on which to stream "
    "test results. Example: \"localhost:555\". The flag is effective only on "
    "Linux.");

GTEST_DEFINE_bool_(
    throw_on_failure,
    internal::BoolFromGTestEnv("throw_on_failure", false),
    "When this flag is specified, a failed assertion will throw an exception "
    "if exceptions are enabled or exit the program with a non-zero code "
    "otherwise.");

#if GTEST_USE_OWN_FLAGFILE_FLAG_
GTEST_DEFINE_string_(
    flagfile,
    internal::StringFromGTestEnv("flagfile", ""),
    "This flag specifies the flagfile to read command-line flags from.");
#endif  // GTEST_USE_OWN_FLAGFILE_FLAG_

namespace internal {

// Generates a random number from [0, range), using a Linear
// Congruential Generator (LCG).  Crashes if 'range' is 0 or greater
// than kMaxRange.
UInt32 Random::Generate(UInt32 range) {
  // These constants are the same as are used in glibc's rand(3).
  state_ = (1103515245U*state_ + 12345U) % kMaxRange;

  GTEST_CHECK_(range > 0)
      << "Cannot generate a number in the range [0, 0).";
  GTEST_CHECK_(range <= kMaxRange)
      << "Generation of a number in [0, " << range << ") was requested, "
      << "but this can only generate numbers in [0, " << kMaxRange << ").";

  // Converting via modulus introduces a bit of downward bias, but
  // it's simple, and a linear congruential generator isn't too good
  // to begin with.
  return state_ % range;
}

// GTestIsInitialized() returns true iff the user has initialized
// Google Test.  Useful for catching the user mistake of not initializing
// Google Test before calling RUN_ALL_TESTS().
static bool GTestIsInitialized() { return GetArgvs().size() > 0; }

// Iterates over a vector of TestCases, keeping a running sum of the
// results of calling a given int-returning method on each.
// Returns the sum.
static int SumOverTestCaseList(const std::vector<TestCase*>& case_list,
                               int (TestCase::*method)() const) {
  int sum = 0;
  for (size_t i = 0; i < case_list.size(); i++) {
    sum += (case_list[i]->*method)();
  }
  return sum;
}

// Returns true iff the test case passed.
static bool TestCasePassed(const TestCase* test_case) {
  return test_case->should_run() && test_case->Passed();
}

// Returns true iff the test case failed.
static bool TestCaseFailed(const TestCase* test_case) {
  return test_case->should_run() && test_case->Failed();
}

// Returns true iff test_case contains at least one test that should
// run.
static bool ShouldRunTestCase(const TestCase* test_case) {
  return test_case->should_run();
}

// AssertHelper constructor.
AssertHelper::AssertHelper(TestPartResult::Type type,
                           const char* file,
                           int line,
                           const char* message)
    : data_(new AssertHelperData(type, file, line, message)) {
}

AssertHelper::~AssertHelper() {
  delete data_;
}

// Message assignment, for assertion streaming support.
void AssertHelper::operator=(const Message& message) const {
  UnitTest::GetInstance()->
    AddTestPartResult(data_->type, data_->file, data_->line,
                      AppendUserMessage(data_->message, message),
                      UnitTest::GetInstance()->impl()
                      ->CurrentOsStackTraceExceptTop(1)
                      // Skips the stack frame for this function itself.
                      );  // NOLINT
}

// Mutex for linked pointers.
GTEST_API_ GTEST_DEFINE_STATIC_MUTEX_(g_linked_ptr_mutex);

// A copy of all command line arguments.  Set by InitGoogleTest().
::std::vector<testing::internal::string> g_argvs;

const ::std::vector<testing::internal::string>& GetArgvs() {
#if defined(GTEST_CUSTOM_GET_ARGVS_)
  return GTEST_CUSTOM_GET_ARGVS_();
#else  // defined(GTEST_CUSTOM_GET_ARGVS_)
  return g_argvs;
#endif  // defined(GTEST_CUSTOM_GET_ARGVS_)
}

// Returns the current application's name, removing directory path if that
// is present.
FilePath GetCurrentExecutableName() {
  FilePath result;

#if GTEST_OS_WINDOWS
  result.Set(FilePath(GetArgvs()[0]).RemoveExtension("exe"));
#else
  result.Set(FilePath(GetArgvs()[0]));
#endif  // GTEST_OS_WINDOWS

  return result.RemoveDirectoryName();
}

// Functions for processing the gtest_output flag.

// Returns the output format, or "" for normal printed output.
std::string UnitTestOptions::GetOutputFormat() {
  const char* const gtest_output_flag = GTEST_FLAG(output).c_str();
  if (gtest_output_flag == NULL) return std::string("");

  const char* const colon = strchr(gtest_output_flag, ':');
  return (colon == NULL) ?
      std::string(gtest_output_flag) :
      std::string(gtest_output_flag, colon - gtest_output_flag);
}

// Returns the name of the requested output file, or the default if none
// was explicitly specified.
std::string UnitTestOptions::GetAbsolutePathToOutputFile() {
  const char* const gtest_output_flag = GTEST_FLAG(output).c_str();
  if (gtest_output_flag == NULL)
    return "";

  const char* const colon = strchr(gtest_output_flag, ':');
  if (colon == NULL)
    return internal::FilePath::ConcatPaths(
        internal::FilePath(
            UnitTest::GetInstance()->original_working_dir()),
        internal::FilePath(kDefaultOutputFile)).string();

  internal::FilePath output_name(colon + 1);
  if (!output_name.IsAbsolutePath())
    // TODO(wan@google.com): on Windows \some\path is not an absolute
    // path (as its meaning depends on the current drive), yet the
    // following logic for turning it into an absolute path is wrong.
    // Fix it.
    output_name = internal::FilePath::ConcatPaths(
        internal::FilePath(UnitTest::GetInstance()->original_working_dir()),
        internal::FilePath(colon + 1));

  if (!output_name.IsDirectory())
    return output_name.string();

  internal::FilePath result(internal::FilePath::GenerateUniqueFileName(
      output_name, internal::GetCurrentExecutableName(),
      GetOutputFormat().c_str()));
  return result.string();
}

// Returns true iff the wildcard pattern matches the string.  The
// first ':' or '\0' character in pattern marks the end of it.
//
// This recursive algorithm isn't very efficient, but is clear and
// works well enough for matching test names, which are short.
bool UnitTestOptions::PatternMatchesString(const char *pattern,
                                           const char *str) {
  switch (*pattern) {
    case '\0':
    case ':':  // Either ':' or '\0' marks the end of the pattern.
      return *str == '\0';
    case '?':  // Matches any single character.
      return *str != '\0' && PatternMatchesString(pattern + 1, str + 1);
    case '*':  // Matches any string (possibly empty) of characters.
      return (*str != '\0' && PatternMatchesString(pattern, str + 1)) ||
          PatternMatchesString(pattern + 1, str);
    default:  // Non-special character.  Matches itself.
      return *pattern == *str &&
          PatternMatchesString(pattern + 1, str + 1);
  }
}

bool UnitTestOptions::MatchesFilter(
    const std::string& name, const char* filter) {
  const char *cur_pattern = filter;
  for (;;) {
    if (PatternMatchesString(cur_pattern, name.c_str())) {
      return true;
    }

    // Finds the next pattern in the filter.
    cur_pattern = strchr(cur_pattern, ':');

    // Returns if no more pattern can be found.
    if (cur_pattern == NULL) {
      return false;
    }

    // Skips the pattern separater (the ':' character).
    cur_pattern++;
  }
}

// Returns true iff the user-specified filter matches the test case
// name and the test name.
bool UnitTestOptions::FilterMatchesTest(const std::string &test_case_name,
                                        const std::string &test_name) {
  const std::string& full_name = test_case_name + "." + test_name.c_str();

  // Split --gtest_filter at '-', if there is one, to separate into
  // positive filter and negative filter portions
  const char* const p = GTEST_FLAG(filter).c_str();
  const char* const dash = strchr(p, '-');
  std::string positive;
  std::string negative;
  if (dash == NULL) {
    positive = GTEST_FLAG(filter).c_str();  // Whole string is a positive filter
    negative = "";
  } else {
    positive = std::string(p, dash);   // Everything up to the dash
    negative = std::string(dash + 1);  // Everything after the dash
    if (positive.empty()) {
      // Treat '-test1' as the same as '*-test1'
      positive = kUniversalFilter;
    }
  }

  // A filter is a colon-separated list of patterns.  It matches a
  // test if any pattern in it matches the test.
  return (MatchesFilter(full_name, positive.c_str()) &&
          !MatchesFilter(full_name, negative.c_str()));
}

#if GTEST_HAS_SEH
// Returns EXCEPTION_EXECUTE_HANDLER if Google Test should handle the
// given SEH exception, or EXCEPTION_CONTINUE_SEARCH otherwise.
// This function is useful as an __except condition.
int UnitTestOptions::GTestShouldProcessSEH(DWORD exception_code) {
  // Google Test should handle a SEH exception if:
  //   1. the user wants it to, AND
  //   2. this is not a breakpoint exception, AND
  //   3. this is not a C++ exception (VC++ implements them via SEH,
  //      apparently).
  //
  // SEH exception code for C++ exceptions.
  // (see http://support.microsoft.com/kb/185294 for more information).
  const DWORD kCxxExceptionCode = 0xe06d7363;

  bool should_handle = true;

  if (!GTEST_FLAG(catch_exceptions))
    should_handle = false;
  else if (exception_code == EXCEPTION_BREAKPOINT)
    should_handle = false;
  else if (exception_code == kCxxExceptionCode)
    should_handle = false;

  return should_handle ? EXCEPTION_EXECUTE_HANDLER : EXCEPTION_CONTINUE_SEARCH;
}
#endif  // GTEST_HAS_SEH

}  // namespace internal

// The c'tor sets this object as the test part result reporter used by
// Google Test.  The 'result' parameter specifies where to report the
// results. Intercepts only failures from the current thread.
ScopedFakeTestPartResultReporter::ScopedFakeTestPartResultReporter(
    TestPartResultArray* result)
    : intercept_mode_(INTERCEPT_ONLY_CURRENT_THREAD),
      result_(result) {
  Init();
}

// The c'tor sets this object as the test part result reporter used by
// Google Test.  The 'result' parameter specifies where to report the
// results.
ScopedFakeTestPartResultReporter::ScopedFakeTestPartResultReporter(
    InterceptMode intercept_mode, TestPartResultArray* result)
    : intercept_mode_(intercept_mode),
      result_(result) {
  Init();
}

void ScopedFakeTestPartResultReporter::Init() {
  internal::UnitTestImpl* const impl = internal::GetUnitTestImpl();
  if (intercept_mode_ == INTERCEPT_ALL_THREADS) {
    old_reporter_ = impl->GetGlobalTestPartResultReporter();
    impl->SetGlobalTestPartResultReporter(this);
  } else {
    old_reporter_ = impl->GetTestPartResultReporterForCurrentThread();
    impl->SetTestPartResultReporterForCurrentThread(this);
  }
}

// The d'tor restores the test part result reporter used by Google Test
// before.
ScopedFakeTestPartResultReporter::~ScopedFakeTestPartResultReporter() {
  internal::UnitTestImpl* const impl = internal::GetUnitTestImpl();
  if (intercept_mode_ == INTERCEPT_ALL_THREADS) {
    impl->SetGlobalTestPartResultReporter(old_reporter_);
  } else {
    impl->SetTestPartResultReporterForCurrentThread(old_reporter_);
  }
}

// Increments the test part result count and remembers the result.
// This method is from the TestPartResultReporterInterface interface.
void ScopedFakeTestPartResultReporter::ReportTestPartResult(
    const TestPartResult& result) {
  result_->Append(result);
}

namespace internal {

// Returns the type ID of ::testing::Test.  We should always call this
// instead of GetTypeId< ::testing::Test>() to get the type ID of
// testing::Test.  This is to work around a suspected linker bug when
// using Google Test as a framework on Mac OS X.  The bug causes
// GetTypeId< ::testing::Test>() to return different values depending
// on whether the call is from the Google Test framework itself or
// from user test code.  GetTestTypeId() is guaranteed to always
// return the same value, as it always calls GetTypeId<>() from the
// gtest.cc, which is within the Google Test framework.
TypeId GetTestTypeId() {
  return GetTypeId<Test>();
}

// The value of GetTestTypeId() as seen from within the Google Test
// library.  This is solely for testing GetTestTypeId().
extern const TypeId kTestTypeIdInGoogleTest = GetTestTypeId();

// This predicate-formatter checks that 'results' contains a test part
// failure of the given type and that the failure message contains the
// given substring.
AssertionResult HasOneFailure(const char* /* results_expr */,
                              const char* /* type_expr */,
                              const char* /* substr_expr */,
                              const TestPartResultArray& results,
                              TestPartResult::Type type,
                              const string& substr) {
  const std::string expected(type == TestPartResult::kFatalFailure ?
                        "1 fatal failure" :
                        "1 non-fatal failure");
  Message msg;
  if (results.size() != 1) {
    msg << "Expected: " << expected << "\n"
        << "  Actual: " << results.size() << " failures";
    for (int i = 0; i < results.size(); i++) {
      msg << "\n" << results.GetTestPartResult(i);
    }
    return AssertionFailure() << msg;
  }

  const TestPartResult& r = results.GetTestPartResult(0);
  if (r.type() != type) {
    return AssertionFailure() << "Expected: " << expected << "\n"
                              << "  Actual:\n"
                              << r;
  }

  if (strstr(r.message(), substr.c_str()) == NULL) {
    return AssertionFailure() << "Expected: " << expected << " containing \""
                              << substr << "\"\n"
                              << "  Actual:\n"
                              << r;
  }

  return AssertionSuccess();
}

// The constructor of SingleFailureChecker remembers where to look up
// test part results, what type of failure we expect, and what
// substring the failure message should contain.
SingleFailureChecker:: SingleFailureChecker(
    const TestPartResultArray* results,
    TestPartResult::Type type,
    const string& substr)
    : results_(results),
      type_(type),
      substr_(substr) {}

// The destructor of SingleFailureChecker verifies that the given
// TestPartResultArray contains exactly one failure that has the given
// type and contains the given substring.  If that's not the case, a
// non-fatal failure will be generated.
SingleFailureChecker::~SingleFailureChecker() {
  EXPECT_PRED_FORMAT3(HasOneFailure, *results_, type_, substr_);
}

DefaultGlobalTestPartResultReporter::DefaultGlobalTestPartResultReporter(
    UnitTestImpl* unit_test) : unit_test_(unit_test) {}

void DefaultGlobalTestPartResultReporter::ReportTestPartResult(
    const TestPartResult& result) {
  unit_test_->current_test_result()->AddTestPartResult(result);
  unit_test_->listeners()->repeater()->OnTestPartResult(result);
}

DefaultPerThreadTestPartResultReporter::DefaultPerThreadTestPartResultReporter(
    UnitTestImpl* unit_test) : unit_test_(unit_test) {}

void DefaultPerThreadTestPartResultReporter::ReportTestPartResult(
    const TestPartResult& result) {
  unit_test_->GetGlobalTestPartResultReporter()->ReportTestPartResult(result);
}

// Returns the global test part result reporter.
TestPartResultReporterInterface*
UnitTestImpl::GetGlobalTestPartResultReporter() {
  internal::MutexLock lock(&global_test_part_result_reporter_mutex_);
  return global_test_part_result_repoter_;
}

// Sets the global test part result reporter.
void UnitTestImpl::SetGlobalTestPartResultReporter(
    TestPartResultReporterInterface* reporter) {
  internal::MutexLock lock(&global_test_part_result_reporter_mutex_);
  global_test_part_result_repoter_ = reporter;
}

// Returns the test part result reporter for the current thread.
TestPartResultReporterInterface*
UnitTestImpl::GetTestPartResultReporterForCurrentThread() {
  return per_thread_test_part_result_reporter_.get();
}

// Sets the test part result reporter for the current thread.
void UnitTestImpl::SetTestPartResultReporterForCurrentThread(
    TestPartResultReporterInterface* reporter) {
  per_thread_test_part_result_reporter_.set(reporter);
}

// Gets the number of successful test cases.
int UnitTestImpl::successful_test_case_count() const {
  return CountIf(test_cases_, TestCasePassed);
}

// Gets the number of failed test cases.
int UnitTestImpl::failed_test_case_count() const {
  return CountIf(test_cases_, TestCaseFailed);
}

// Gets the number of all test cases.
int UnitTestImpl::total_test_case_count() const {
  return static_cast<int>(test_cases_.size());
}

// Gets the number of all test cases that contain at least one test
// that should run.
int UnitTestImpl::test_case_to_run_count() const {
  return CountIf(test_cases_, ShouldRunTestCase);
}

// Gets the number of successful tests.
int UnitTestImpl::successful_test_count() const {
  return SumOverTestCaseList(test_cases_, &TestCase::successful_test_count);
}

// Gets the number of failed tests.
int UnitTestImpl::failed_test_count() const {
  return SumOverTestCaseList(test_cases_, &TestCase::failed_test_count);
}

// Gets the number of disabled tests that will be reported in the XML report.
int UnitTestImpl::reportable_disabled_test_count() const {
  return SumOverTestCaseList(test_cases_,
                             &TestCase::reportable_disabled_test_count);
}

// Gets the number of disabled tests.
int UnitTestImpl::disabled_test_count() const {
  return SumOverTestCaseList(test_cases_, &TestCase::disabled_test_count);
}

// Gets the number of tests to be printed in the XML report.
int UnitTestImpl::reportable_test_count() const {
  return SumOverTestCaseList(test_cases_, &TestCase::reportable_test_count);
}

// Gets the number of all tests.
int UnitTestImpl::total_test_count() const {
  return SumOverTestCaseList(test_cases_, &TestCase::total_test_count);
}

// Gets the number of tests that should run.
int UnitTestImpl::test_to_run_count() const {
  return SumOverTestCaseList(test_cases_, &TestCase::test_to_run_count);
}

// Returns the current OS stack trace as an std::string.
//
// The maximum number of stack frames to be included is specified by
// the gtest_stack_trace_depth flag.  The skip_count parameter
// specifies the number of top frames to be skipped, which doesn't
// count against the number of frames to be included.
//
// For example, if Foo() calls Bar(), which in turn calls
// CurrentOsStackTraceExceptTop(1), Foo() will be included in the
// trace but Bar() and CurrentOsStackTraceExceptTop() won't.
std::string UnitTestImpl::CurrentOsStackTraceExceptTop(int skip_count) {
  return os_stack_trace_getter()->CurrentStackTrace(
      static_cast<int>(GTEST_FLAG(stack_trace_depth)),
      skip_count + 1
      // Skips the user-specified number of frames plus this function
      // itself.
      );  // NOLINT
}

// Returns the current time in milliseconds.
TimeInMillis GetTimeInMillis() {
#if GTEST_OS_WINDOWS_MOBILE || defined(__BORLANDC__)
  // Difference between 1970-01-01 and 1601-01-01 in milliseconds.
  // http://analogous.blogspot.com/2005/04/epoch.html
  const TimeInMillis kJavaEpochToWinFileTimeDelta =
    static_cast<TimeInMillis>(116444736UL) * 100000UL;
  const DWORD kTenthMicrosInMilliSecond = 10000;

  SYSTEMTIME now_systime;
  FILETIME now_filetime;
  ULARGE_INTEGER now_int64;
  // TODO(kenton@google.com): Shouldn't this just use
  //   GetSystemTimeAsFileTime()?
  GetSystemTime(&now_systime);
  if (SystemTimeToFileTime(&now_systime, &now_filetime)) {
    now_int64.LowPart = now_filetime.dwLowDateTime;
    now_int64.HighPart = now_filetime.dwHighDateTime;
    now_int64.QuadPart = (now_int64.QuadPart / kTenthMicrosInMilliSecond) -
      kJavaEpochToWinFileTimeDelta;
    return now_int64.QuadPart;
  }
  return 0;
#elif GTEST_OS_WINDOWS && !GTEST_HAS_GETTIMEOFDAY_
  __timeb64 now;

  // MSVC 8 deprecates _ftime64(), so we want to suppress warning 4996
  // (deprecated function) there.
  // TODO(kenton@google.com): Use GetTickCount()?  Or use
  //   SystemTimeToFileTime()
  GTEST_DISABLE_MSC_WARNINGS_PUSH_(4996)
  _ftime64(&now);
  GTEST_DISABLE_MSC_WARNINGS_POP_()

  return static_cast<TimeInMillis>(now.time) * 1000 + now.millitm;
#elif GTEST_HAS_GETTIMEOFDAY_
  struct timeval now;
  gettimeofday(&now, NULL);
  return static_cast<TimeInMillis>(now.tv_sec) * 1000 + now.tv_usec / 1000;
#else
# error "Don't know how to get the current time on your system."
#endif
}

// Utilities

// class String.

#if GTEST_OS_WINDOWS_MOBILE
// Creates a UTF-16 wide string from the given ANSI string, allocating
// memory using new. The caller is responsible for deleting the return
// value using delete[]. Returns the wide string, or NULL if the
// input is NULL.
LPCWSTR String::AnsiToUtf16(const char* ansi) {
  if (!ansi) return NULL;
  const int length = strlen(ansi);
  const int unicode_length =
      MultiByteToWideChar(CP_ACP, 0, ansi, length,
                          NULL, 0);
  WCHAR* unicode = new WCHAR[unicode_length + 1];
  MultiByteToWideChar(CP_ACP, 0, ansi, length,
                      unicode, unicode_length);
  unicode[unicode_length] = 0;
  return unicode;
}

// Creates an ANSI string from the given wide string, allocating
// memory using new. The caller is responsible for deleting the return
// value using delete[]. Returns the ANSI string, or NULL if the
// input is NULL.
const char* String::Utf16ToAnsi(LPCWSTR utf16_str)  {
  if (!utf16_str) return NULL;
  const int ansi_length =
      WideCharToMultiByte(CP_ACP, 0, utf16_str, -1,
                          NULL, 0, NULL, NULL);
  char* ansi = new char[ansi_length + 1];
  WideCharToMultiByte(CP_ACP, 0, utf16_str, -1,
                      ansi, ansi_length, NULL, NULL);
  ansi[ansi_length] = 0;
  return ansi;
}

#endif  // GTEST_OS_WINDOWS_MOBILE

// Compares two C strings.  Returns true iff they have the same content.
//
// Unlike strcmp(), this function can handle NULL argument(s).  A NULL
// C string is considered different to any non-NULL C string,
// including the empty string.
bool String::CStringEquals(const char * lhs, const char * rhs) {
  if ( lhs == NULL ) return rhs == NULL;

  if ( rhs == NULL ) return false;

  return strcmp(lhs, rhs) == 0;
}

#if GTEST_HAS_STD_WSTRING || GTEST_HAS_GLOBAL_WSTRING

// Converts an array of wide chars to a narrow string using the UTF-8
// encoding, and streams the result to the given Message object.
static void StreamWideCharsToMessage(const wchar_t* wstr, size_t length,
                                     Message* msg) {
  for (size_t i = 0; i != length; ) {  // NOLINT
    if (wstr[i] != L'\0') {
      *msg << WideStringToUtf8(wstr + i, static_cast<int>(length - i));
      while (i != length && wstr[i] != L'\0')
        i++;
    } else {
      *msg << '\0';
      i++;
    }
  }
}

#endif  // GTEST_HAS_STD_WSTRING || GTEST_HAS_GLOBAL_WSTRING

void SplitString(const ::std::string& str, char delimiter,
                 ::std::vector< ::std::string>* dest) {
  ::std::vector< ::std::string> parsed;
  ::std::string::size_type pos = 0;
  while (::testing::internal::AlwaysTrue()) {
    const ::std::string::size_type colon = str.find(delimiter, pos);
    if (colon == ::std::string::npos) {
      parsed.push_back(str.substr(pos));
      break;
    } else {
      parsed.push_back(str.substr(pos, colon - pos));
      pos = colon + 1;
    }
  }
  dest->swap(parsed);
}

}  // namespace internal

// Constructs an empty Message.
// We allocate the stringstream separately because otherwise each use of
// ASSERT/EXPECT in a procedure adds over 200 bytes to the procedure's
// stack frame leading to huge stack frames in some cases; gcc does not reuse
// the stack space.
Message::Message() : ss_(new ::std::stringstream) {
  // By default, we want there to be enough precision when printing
  // a double to a Message.
  *ss_ << std::setprecision(std::numeric_limits<double>::digits10 + 2);
}

// These two overloads allow streaming a wide C string to a Message
// using the UTF-8 encoding.
Message& Message::operator <<(const wchar_t* wide_c_str) {
  return *this << internal::String::ShowWideCString(wide_c_str);
}
Message& Message::operator <<(wchar_t* wide_c_str) {
  return *this << internal::String::ShowWideCString(wide_c_str);
}

#if GTEST_HAS_STD_WSTRING
// Converts the given wide string to a narrow string using the UTF-8
// encoding, and streams the result to this Message object.
Message& Message::operator <<(const ::std::wstring& wstr) {
  internal::StreamWideCharsToMessage(wstr.c_str(), wstr.length(), this);
  return *this;
}
#endif  // GTEST_HAS_STD_WSTRING

#if GTEST_HAS_GLOBAL_WSTRING
// Converts the given wide string to a narrow string using the UTF-8
// encoding, and streams the result to this Message object.
Message& Message::operator <<(const ::wstring& wstr) {
  internal::StreamWideCharsToMessage(wstr.c_str(), wstr.length(), this);
  return *this;
}
#endif  // GTEST_HAS_GLOBAL_WSTRING

// Gets the text streamed to this object so far as an std::string.
// Each '\0' character in the buffer is replaced with "\\0".
std::string Message::GetString() const {
  return internal::StringStreamToString(ss_.get());
}

// AssertionResult constructors.
// Used in EXPECT_TRUE/FALSE(assertion_result).
AssertionResult::AssertionResult(const AssertionResult& other)
    : success_(other.success_),
      message_(other.message_.get() != NULL ?
               new ::std::string(*other.message_) :
               static_cast< ::std::string*>(NULL)) {
}

// Swaps two AssertionResults.
void AssertionResult::swap(AssertionResult& other) {
  using std::swap;
  swap(success_, other.success_);
  swap(message_, other.message_);
}

// Returns the assertion's negation. Used with EXPECT/ASSERT_FALSE.
AssertionResult AssertionResult::operator!() const {
  AssertionResult negation(!success_);
  if (message_.get() != NULL)
    negation << *message_;
  return negation;
}

// Makes a successful assertion result.
AssertionResult AssertionSuccess() {
  return AssertionResult(true);
}

// Makes a failed assertion result.
AssertionResult AssertionFailure() {
  return AssertionResult(false);
}

// Makes a failed assertion result with the given failure message.
// Deprecated; use AssertionFailure() << message.
AssertionResult AssertionFailure(const Message& message) {
  return AssertionFailure() << message;
}

namespace internal {

namespace edit_distance {
std::vector<EditType> CalculateOptimalEdits(const std::vector<size_t>& left,
                                            const std::vector<size_t>& right) {
  std::vector<std::vector<double> > costs(
      left.size() + 1, std::vector<double>(right.size() + 1));
  std::vector<std::vector<EditType> > best_move(
      left.size() + 1, std::vector<EditType>(right.size() + 1));

  // Populate for empty right.
  for (size_t l_i = 0; l_i < costs.size(); ++l_i) {
    costs[l_i][0] = static_cast<double>(l_i);
    best_move[l_i][0] = kRemove;
  }
  // Populate for empty left.
  for (size_t r_i = 1; r_i < costs[0].size(); ++r_i) {
    costs[0][r_i] = static_cast<double>(r_i);
    best_move[0][r_i] = kAdd;
  }

  for (size_t l_i = 0; l_i < left.size(); ++l_i) {
    for (size_t r_i = 0; r_i < right.size(); ++r_i) {
      if (left[l_i] == right[r_i]) {
        // Found a match. Consume it.
        costs[l_i + 1][r_i + 1] = costs[l_i][r_i];
        best_move[l_i + 1][r_i + 1] = kMatch;
        continue;
      }

      const double add = costs[l_i + 1][r_i];
      const double remove = costs[l_i][r_i + 1];
      const double replace = costs[l_i][r_i];
      if (add < remove && add < replace) {
        costs[l_i + 1][r_i + 1] = add + 1;
        best_move[l_i + 1][r_i + 1] = kAdd;
      } else if (remove < add && remove < replace) {
        costs[l_i + 1][r_i + 1] = remove + 1;
        best_move[l_i + 1][r_i + 1] = kRemove;
      } else {
        // We make replace a little more expensive than add/remove to lower
        // their priority.
        costs[l_i + 1][r_i + 1] = replace + 1.00001;
        best_move[l_i + 1][r_i + 1] = kReplace;
      }
    }
  }

  // Reconstruct the best path. We do it in reverse order.
  std::vector<EditType> best_path;
  for (size_t l_i = left.size(), r_i = right.size(); l_i > 0 || r_i > 0;) {
    EditType move = best_move[l_i][r_i];
    best_path.push_back(move);
    l_i -= move != kAdd;
    r_i -= move != kRemove;
  }
  std::reverse(best_path.begin(), best_path.end());
  return best_path;
}

namespace {

// Helper class to convert string into ids with deduplication.
class InternalStrings {
 public:
  size_t GetId(const std::string& str) {
    IdMap::iterator it = ids_.find(str);
    if (it != ids_.end()) return it->second;
    size_t id = ids_.size();
    return ids_[str] = id;
  }

 private:
  typedef std::map<std::string, size_t> IdMap;
  IdMap ids_;
};

}  // namespace

std::vector<EditType> CalculateOptimalEdits(
    const std::vector<std::string>& left,
    const std::vector<std::string>& right) {
  std::vector<size_t> left_ids, right_ids;
  {
    InternalStrings intern_table;
    for (size_t i = 0; i < left.size(); ++i) {
      left_ids.push_back(intern_table.GetId(left[i]));
    }
    for (size_t i = 0; i < right.size(); ++i) {
      right_ids.push_back(intern_table.GetId(right[i]));
    }
  }
  return CalculateOptimalEdits(left_ids, right_ids);
}

namespace {

// Helper class that holds the state for one hunk and prints it out to the
// stream.
// It reorders adds/removes when possible to group all removes before all
// adds. It also adds the hunk header before printint into the stream.
class Hunk {
 public:
  Hunk(size_t left_start, size_t right_start)
      : left_start_(left_start),
        right_start_(right_start),
        adds_(),
        removes_(),
        common_() {}

  void PushLine(char edit, const char* line) {
    switch (edit) {
      case ' ':
        ++common_;
        FlushEdits();
        hunk_.push_back(std::make_pair(' ', line));
        break;
      case '-':
        ++removes_;
        hunk_removes_.push_back(std::make_pair('-', line));
        break;
      case '+':
        ++adds_;
        hunk_adds_.push_back(std::make_pair('+', line));
        break;
    }
  }

  void PrintTo(std::ostream* os) {
    PrintHeader(os);
    FlushEdits();
    for (std::list<std::pair<char, const char*> >::const_iterator it =
             hunk_.begin();
         it != hunk_.end(); ++it) {
      *os << it->first << it->second << "\n";
    }
  }

  bool has_edits() const { return adds_ || removes_; }

 private:
  void FlushEdits() {
    hunk_.splice(hunk_.end(), hunk_removes_);
    hunk_.splice(hunk_.end(), hunk_adds_);
  }

  // Print a unified diff header for one hunk.
  // The format is
  //   "@@ -<left_start>,<left_length> +<right_start>,<right_length> @@"
  // where the left/right parts are ommitted if unnecessary.
  void PrintHeader(std::ostream* ss) const {
    *ss << "@@ ";
    if (removes_) {
      *ss << "-" << left_start_ << "," << (removes_ + common_);
    }
    if (removes_ && adds_) {
      *ss << " ";
    }
    if (adds_) {
      *ss << "+" << right_start_ << "," << (adds_ + common_);
    }
    *ss << " @@\n";
  }

  size_t left_start_, right_start_;
  size_t adds_, removes_, common_;
  std::list<std::pair<char, const char*> > hunk_, hunk_adds_, hunk_removes_;
};

}  // namespace

// Create a list of diff hunks in Unified diff format.
// Each hunk has a header generated by PrintHeader above plus a body with
// lines prefixed with ' ' for no change, '-' for deletion and '+' for
// addition.
// 'context' represents the desired unchanged prefix/suffix around the diff.
// If two hunks are close enough that their contexts overlap, then they are
// joined into one hunk.
std::string CreateUnifiedDiff(const std::vector<std::string>& left,
                              const std::vector<std::string>& right,
                              size_t context) {
  const std::vector<EditType> edits = CalculateOptimalEdits(left, right);

  size_t l_i = 0, r_i = 0, edit_i = 0;
  std::stringstream ss;
  while (edit_i < edits.size()) {
    // Find first edit.
    while (edit_i < edits.size() && edits[edit_i] == kMatch) {
      ++l_i;
      ++r_i;
      ++edit_i;
    }

    // Find the first line to include in the hunk.
    const size_t prefix_context = std::min(l_i, context);
    Hunk hunk(l_i - prefix_context + 1, r_i - prefix_context + 1);
    for (size_t i = prefix_context; i > 0; --i) {
      hunk.PushLine(' ', left[l_i - i].c_str());
    }

    // Iterate the edits until we found enough suffix for the hunk or the input
    // is over.
    size_t n_suffix = 0;
    for (; edit_i < edits.size(); ++edit_i) {
      if (n_suffix >= context) {
        // Continue only if the next hunk is very close.
        std::vector<EditType>::const_iterator it = edits.begin() + edit_i;
        while (it != edits.end() && *it == kMatch) ++it;
        if (it == edits.end() || (it - edits.begin()) - edit_i >= context) {
          // There is no next edit or it is too far away.
          break;
        }
      }

      EditType edit = edits[edit_i];
      // Reset count when a non match is found.
      n_suffix = edit == kMatch ? n_suffix + 1 : 0;

      if (edit == kMatch || edit == kRemove || edit == kReplace) {
        hunk.PushLine(edit == kMatch ? ' ' : '-', left[l_i].c_str());
      }
      if (edit == kAdd || edit == kReplace) {
        hunk.PushLine('+', right[r_i].c_str());
      }

      // Advance indices, depending on edit type.
      l_i += edit != kAdd;
      r_i += edit != kRemove;
    }

    if (!hunk.has_edits()) {
      // We are done. We don't want this hunk.
      break;
    }

    hunk.PrintTo(&ss);
  }
  return ss.str();
}

}  // namespace edit_distance

namespace {

// The string representation of the values received in EqFailure() are already
// escaped. Split them on escaped '\n' boundaries. Leave all other escaped
// characters the same.
std::vector<std::string> SplitEscapedString(const std::string& str) {
  std::vector<std::string> lines;
  size_t start = 0, end = str.size();
  if (end > 2 && str[0] == '"' && str[end - 1] == '"') {
    ++start;
    --end;
  }
  bool escaped = false;
  for (size_t i = start; i + 1 < end; ++i) {
    if (escaped) {
      escaped = false;
      if (str[i] == 'n') {
        lines.push_back(str.substr(start, i - start - 1));
        start = i + 1;
      }
    } else {
      escaped = str[i] == '\\';
    }
  }
  lines.push_back(str.substr(start, end - start));
  return lines;
}

}  // namespace

// Constructs and returns the message for an equality assertion
// (e.g. ASSERT_EQ, EXPECT_STREQ, etc) failure.
//
// The first four parameters are the expressions used in the assertion
// and their values, as strings.  For example, for ASSERT_EQ(foo, bar)
// where foo is 5 and bar is 6, we have:
//
//   expected_expression: "foo"
//   actual_expression:   "bar"
//   expected_value:      "5"
//   actual_value:        "6"
//
// The ignoring_case parameter is true iff the assertion is a
// *_STRCASEEQ*.  When it's true, the string " (ignoring case)" will
// be inserted into the message.
AssertionResult EqFailure(const char* expected_expression,
                          const char* actual_expression,
                          const std::string& expected_value,
                          const std::string& actual_value,
                          bool ignoring_case) {
  Message msg;
  msg << "Value of: " << actual_expression;
  if (actual_value != actual_expression) {
    msg << "\n  Actual: " << actual_value;
  }

  msg << "\nExpected: " << expected_expression;
  if (ignoring_case) {
    msg << " (ignoring case)";
  }
  if (expected_value != expected_expression) {
    msg << "\nWhich is: " << expected_value;
  }

  if (!expected_value.empty() && !actual_value.empty()) {
    const std::vector<std::string> expected_lines =
        SplitEscapedString(expected_value);
    const std::vector<std::string> actual_lines =
        SplitEscapedString(actual_value);
    if (expected_lines.size() > 1 || actual_lines.size() > 1) {
      msg << "\nWith diff:\n"
          << edit_distance::CreateUnifiedDiff(expected_lines, actual_lines);
    }
  }

  return AssertionFailure() << msg;
}

// Constructs a failure message for Boolean assertions such as EXPECT_TRUE.
std::string GetBoolAssertionFailureMessage(
    const AssertionResult& assertion_result,
    const char* expression_text,
    const char* actual_predicate_value,
    const char* expected_predicate_value) {
  const char* actual_message = assertion_result.message();
  Message msg;
  msg << "Value of: " << expression_text
      << "\n  Actual: " << actual_predicate_value;
  if (actual_message[0] != '\0')
    msg << " (" << actual_message << ")";
  msg << "\nExpected: " << expected_predicate_value;
  return msg.GetString();
}

// Helper function for implementing ASSERT_NEAR.
AssertionResult DoubleNearPredFormat(const char* expr1,
                                     const char* expr2,
                                     const char* abs_error_expr,
                                     double val1,
                                     double val2,
                                     double abs_error) {
  const double diff = fabs(val1 - val2);
  if (diff <= abs_error) return AssertionSuccess();

  // TODO(wan): do not print the value of an expression if it's
  // already a literal.
  return AssertionFailure()
      << "The difference between " << expr1 << " and " << expr2
      << " is " << diff << ", which exceeds " << abs_error_expr << ", where\n"
      << expr1 << " evaluates to " << val1 << ",\n"
      << expr2 << " evaluates to " << val2 << ", and\n"
      << abs_error_expr << " evaluates to " << abs_error << ".";
}


// Helper template for implementing FloatLE() and DoubleLE().
template <typename RawType>
AssertionResult FloatingPointLE(const char* expr1,
                                const char* expr2,
                                RawType val1,
                                RawType val2) {
  // Returns success if val1 is less than val2,
  if (val1 < val2) {
    return AssertionSuccess();
  }

  // or if val1 is almost equal to val2.
  const FloatingPoint<RawType> lhs(val1), rhs(val2);
  if (lhs.AlmostEquals(rhs)) {
    return AssertionSuccess();
  }

  // Note that the above two checks will both fail if either val1 or
  // val2 is NaN, as the IEEE floating-point standard requires that
  // any predicate involving a NaN must return false.

  ::std::stringstream val1_ss;
  val1_ss << std::setprecision(std::numeric_limits<RawType>::digits10 + 2)
          << val1;

  ::std::stringstream val2_ss;
  val2_ss << std::setprecision(std::numeric_limits<RawType>::digits10 + 2)
          << val2;

  return AssertionFailure()
      << "Expected: (" << expr1 << ") <= (" << expr2 << ")\n"
      << "  Actual: " << StringStreamToString(&val1_ss) << " vs "
      << StringStreamToString(&val2_ss);
}

}  // namespace internal

// Asserts that val1 is less than, or almost equal to, val2.  Fails
// otherwise.  In particular, it fails if either val1 or val2 is NaN.
AssertionResult FloatLE(const char* expr1, const char* expr2,
                        float val1, float val2) {
  return internal::FloatingPointLE<float>(expr1, expr2, val1, val2);
}

// Asserts that val1 is less than, or almost equal to, val2.  Fails
// otherwise.  In particular, it fails if either val1 or val2 is NaN.
AssertionResult DoubleLE(const char* expr1, const char* expr2,
                         double val1, double val2) {
  return internal::FloatingPointLE<double>(expr1, expr2, val1, val2);
}

namespace internal {

// The helper function for {ASSERT|EXPECT}_EQ with int or enum
// arguments.
AssertionResult CmpHelperEQ(const char* expected_expression,
                            const char* actual_expression,
                            BiggestInt expected,
                            BiggestInt actual) {
  if (expected == actual) {
    return AssertionSuccess();
  }

  return EqFailure(expected_expression,
                   actual_expression,
                   FormatForComparisonFailureMessage(expected, actual),
                   FormatForComparisonFailureMessage(actual, expected),
                   false);
}

// A macro for implementing the helper functions needed to implement
// ASSERT_?? and EXPECT_?? with integer or enum arguments.  It is here
// just to avoid copy-and-paste of similar code.
#define GTEST_IMPL_CMP_HELPER_(op_name, op)\
AssertionResult CmpHelper##op_name(const char* expr1, const char* expr2, \
                                   BiggestInt val1, BiggestInt val2) {\
  if (val1 op val2) {\
    return AssertionSuccess();\
  } else {\
    return AssertionFailure() \
        << "Expected: (" << expr1 << ") " #op " (" << expr2\
        << "), actual: " << FormatForComparisonFailureMessage(val1, val2)\
        << " vs " << FormatForComparisonFailureMessage(val2, val1);\
  }\
}

// Implements the helper function for {ASSERT|EXPECT}_NE with int or
// enum arguments.
GTEST_IMPL_CMP_HELPER_(NE, !=)
// Implements the helper function for {ASSERT|EXPECT}_LE with int or
// enum arguments.
GTEST_IMPL_CMP_HELPER_(LE, <=)
// Implements the helper function for {ASSERT|EXPECT}_LT with int or
// enum arguments.
GTEST_IMPL_CMP_HELPER_(LT, < )
// Implements the helper function for {ASSERT|EXPECT}_GE with int or
// enum arguments.
GTEST_IMPL_CMP_HELPER_(GE, >=)
// Implements the helper function for {ASSERT|EXPECT}_GT with int or
// enum arguments.
GTEST_IMPL_CMP_HELPER_(GT, > )

#undef GTEST_IMPL_CMP_HELPER_

// The helper function for {ASSERT|EXPECT}_STREQ.
AssertionResult CmpHelperSTREQ(const char* expected_expression,
                               const char* actual_expression,
                               const char* expected,
                               const char* actual) {
  if (String::CStringEquals(expected, actual)) {
    return AssertionSuccess();
  }

  return EqFailure(expected_expression,
                   actual_expression,
                   PrintToString(expected),
                   PrintToString(actual),
                   false);
}

// The helper function for {ASSERT|EXPECT}_STRCASEEQ.
AssertionResult CmpHelperSTRCASEEQ(const char* expected_expression,
                                   const char* actual_expression,
                                   const char* expected,
                                   const char* actual) {
  if (String::CaseInsensitiveCStringEquals(expected, actual)) {
    return AssertionSuccess();
  }

  return EqFailure(expected_expression,
                   actual_expression,
                   PrintToString(expected),
                   PrintToString(actual),
                   true);
}

// The helper function for {ASSERT|EXPECT}_STRNE.
AssertionResult CmpHelperSTRNE(const char* s1_expression,
                               const char* s2_expression,
                               const char* s1,
                               const char* s2) {
  if (!String::CStringEquals(s1, s2)) {
    return AssertionSuccess();
  } else {
    return AssertionFailure() << "Expected: (" << s1_expression << ") != ("
                              << s2_expression << "), actual: \""
                              << s1 << "\" vs \"" << s2 << "\"";
  }
}

// The helper function for {ASSERT|EXPECT}_STRCASENE.
AssertionResult CmpHelperSTRCASENE(const char* s1_expression,
                                   const char* s2_expression,
                                   const char* s1,
                                   const char* s2) {
  if (!String::CaseInsensitiveCStringEquals(s1, s2)) {
    return AssertionSuccess();
  } else {
    return AssertionFailure()
        << "Expected: (" << s1_expression << ") != ("
        << s2_expression << ") (ignoring case), actual: \""
        << s1 << "\" vs \"" << s2 << "\"";
  }
}

}  // namespace internal

namespace {

// Helper functions for implementing IsSubString() and IsNotSubstring().

// This group of overloaded functions return true iff needle is a
// substring of haystack.  NULL is considered a substring of itself
// only.

bool IsSubstringPred(const char* needle, const char* haystack) {
  if (needle == NULL || haystack == NULL)
    return needle == haystack;

  return strstr(haystack, needle) != NULL;
}

bool IsSubstringPred(const wchar_t* needle, const wchar_t* haystack) {
  if (needle == NULL || haystack == NULL)
    return needle == haystack;

  return wcsstr(haystack, needle) != NULL;
}

// StringType here can be either ::std::string or ::std::wstring.
template <typename StringType>
bool IsSubstringPred(const StringType& needle,
                     const StringType& haystack) {
  return haystack.find(needle) != StringType::npos;
}

// This function implements either IsSubstring() or IsNotSubstring(),
// depending on the value of the expected_to_be_substring parameter.
// StringType here can be const char*, const wchar_t*, ::std::string,
// or ::std::wstring.
template <typename StringType>
AssertionResult IsSubstringImpl(
    bool expected_to_be_substring,
    const char* needle_expr, const char* haystack_expr,
    const StringType& needle, const StringType& haystack) {
  if (IsSubstringPred(needle, haystack) == expected_to_be_substring)
    return AssertionSuccess();

  const bool is_wide_string = sizeof(needle[0]) > 1;
  const char* const begin_string_quote = is_wide_string ? "L\"" : "\"";
  return AssertionFailure()
      << "Value of: " << needle_expr << "\n"
      << "  Actual: " << begin_string_quote << needle << "\"\n"
      << "Expected: " << (expected_to_be_substring ? "" : "not ")
      << "a substring of " << haystack_expr << "\n"
      << "Which is: " << begin_string_quote << haystack << "\"";
}

}  // namespace

// IsSubstring() and IsNotSubstring() check whether needle is a
// substring of haystack (NULL is considered a substring of itself
// only), and return an appropriate error message when they fail.

AssertionResult IsSubstring(
    const char* needle_expr, const char* haystack_expr,
    const char* needle, const char* haystack) {
  return IsSubstringImpl(true, needle_expr, haystack_expr, needle, haystack);
}

AssertionResult IsSubstring(
    const char* needle_expr, const char* haystack_expr,
    const wchar_t* needle, const wchar_t* haystack) {
  return IsSubstringImpl(true, needle_expr, haystack_expr, needle, haystack);
}

AssertionResult IsNotSubstring(
    const char* needle_expr, const char* haystack_expr,
    const char* needle, const char* haystack) {
  return IsSubstringImpl(false, needle_expr, haystack_expr, needle, haystack);
}

AssertionResult IsNotSubstring(
    const char* needle_expr, const char* haystack_expr,
    const wchar_t* needle, const wchar_t* haystack) {
  return IsSubstringImpl(false, needle_expr, haystack_expr, needle, haystack);
}

AssertionResult IsSubstring(
    const char* needle_expr, const char* haystack_expr,
    const ::std::string& needle, const ::std::string& haystack) {
  return IsSubstringImpl(true, needle_expr, haystack_expr, needle, haystack);
}

AssertionResult IsNotSubstring(
    const char* needle_expr, const char* haystack_expr,
    const ::std::string& needle, const ::std::string& haystack) {
  return IsSubstringImpl(false, needle_expr, haystack_expr, needle, haystack);
}

#if GTEST_HAS_STD_WSTRING
AssertionResult IsSubstring(
    const char* needle_expr, const char* haystack_expr,
    const ::std::wstring& needle, const ::std::wstring& haystack) {
  return IsSubstringImpl(true, needle_expr, haystack_expr, needle, haystack);
}

AssertionResult IsNotSubstring(
    const char* needle_expr, const char* haystack_expr,
    const ::std::wstring& needle, const ::std::wstring& haystack) {
  return IsSubstringImpl(false, needle_expr, haystack_expr, needle, haystack);
}
#endif  // GTEST_HAS_STD_WSTRING

namespace internal {

#if GTEST_OS_WINDOWS

namespace {

// Helper function for IsHRESULT{SuccessFailure} predicates
AssertionResult HRESULTFailureHelper(const char* expr,
                                     const char* expected,
                                     long hr) {  // NOLINT
# if GTEST_OS_WINDOWS_MOBILE

  // Windows CE doesn't support FormatMessage.
  const char error_text[] = "";

# else

  // Looks up the human-readable system message for the HRESULT code
  // and since we're not passing any params to FormatMessage, we don't
  // want inserts expanded.
  const DWORD kFlags = FORMAT_MESSAGE_FROM_SYSTEM |
                       FORMAT_MESSAGE_IGNORE_INSERTS;
  const DWORD kBufSize = 4096;
  // Gets the system's human readable message string for this HRESULT.
  char error_text[kBufSize] = { '\0' };
  DWORD message_length = ::FormatMessageA(kFlags,
                                          0,  // no source, we're asking system
                                          hr,  // the error
                                          0,  // no line width restrictions
                                          error_text,  // output buffer
                                          kBufSize,  // buf size
                                          NULL);  // no arguments for inserts
  // Trims tailing white space (FormatMessage leaves a trailing CR-LF)
  for (; message_length && IsSpace(error_text[message_length - 1]);
          --message_length) {
    error_text[message_length - 1] = '\0';
  }

# endif  // GTEST_OS_WINDOWS_MOBILE

  const std::string error_hex("0x" + String::FormatHexInt(hr));
  return ::testing::AssertionFailure()
      << "Expected: " << expr << " " << expected << ".\n"
      << "  Actual: " << error_hex << " " << error_text << "\n";
}

}  // namespace

AssertionResult IsHRESULTSuccess(const char* expr, long hr) {  // NOLINT
  if (SUCCEEDED(hr)) {
    return AssertionSuccess();
  }
  return HRESULTFailureHelper(expr, "succeeds", hr);
}

AssertionResult IsHRESULTFailure(const char* expr, long hr) {  // NOLINT
  if (FAILED(hr)) {
    return AssertionSuccess();
  }
  return HRESULTFailureHelper(expr, "fails", hr);
}

#endif  // GTEST_OS_WINDOWS

// Utility functions for encoding Unicode text (wide strings) in
// UTF-8.

// A Unicode code-point can have upto 21 bits, and is encoded in UTF-8
// like this:
//
// Code-point length   Encoding
//   0 -  7 bits       0xxxxxxx
//   8 - 11 bits       110xxxxx 10xxxxxx
//  12 - 16 bits       1110xxxx 10xxxxxx 10xxxxxx
//  17 - 21 bits       11110xxx 10xxxxxx 10xxxxxx 10xxxxxx

// The maximum code-point a one-byte UTF-8 sequence can represent.
const UInt32 kMaxCodePoint1 = (static_cast<UInt32>(1) <<  7) - 1;

// The maximum code-point a two-byte UTF-8 sequence can represent.
const UInt32 kMaxCodePoint2 = (static_cast<UInt32>(1) << (5 + 6)) - 1;

// The maximum code-point a three-byte UTF-8 sequence can represent.
const UInt32 kMaxCodePoint3 = (static_cast<UInt32>(1) << (4 + 2*6)) - 1;

// The maximum code-point a four-byte UTF-8 sequence can represent.
const UInt32 kMaxCodePoint4 = (static_cast<UInt32>(1) << (3 + 3*6)) - 1;

// Chops off the n lowest bits from a bit pattern.  Returns the n
// lowest bits.  As a side effect, the original bit pattern will be
// shifted to the right by n bits.
inline UInt32 ChopLowBits(UInt32* bits, int n) {
  const UInt32 low_bits = *bits & ((static_cast<UInt32>(1) << n) - 1);
  *bits >>= n;
  return low_bits;
}

// Converts a Unicode code point to a narrow string in UTF-8 encoding.
// code_point parameter is of type UInt32 because wchar_t may not be
// wide enough to contain a code point.
// If the code_point is not a valid Unicode code point
// (i.e. outside of Unicode range U+0 to U+10FFFF) it will be converted
// to "(Invalid Unicode 0xXXXXXXXX)".
std::string CodePointToUtf8(UInt32 code_point) {
  if (code_point > kMaxCodePoint4) {
    return "(Invalid Unicode 0x" + String::FormatHexInt(code_point) + ")";
  }

  char str[5];  // Big enough for the largest valid code point.
  if (code_point <= kMaxCodePoint1) {
    str[1] = '\0';
    str[0] = static_cast<char>(code_point);                          // 0xxxxxxx
  } else if (code_point <= kMaxCodePoint2) {
    str[2] = '\0';
    str[1] = static_cast<char>(0x80 | ChopLowBits(&code_point, 6));  // 10xxxxxx
    str[0] = static_cast<char>(0xC0 | code_point);                   // 110xxxxx
  } else if (code_point <= kMaxCodePoint3) {
    str[3] = '\0';
    str[2] = static_cast<char>(0x80 | ChopLowBits(&code_point, 6));  // 10xxxxxx
    str[1] = static_cast<char>(0x80 | ChopLowBits(&code_point, 6));  // 10xxxxxx
    str[0] = static_cast<char>(0xE0 | code_point);                   // 1110xxxx
  } else {  // code_point <= kMaxCodePoint4
    str[4] = '\0';
    str[3] = static_cast<char>(0x80 | ChopLowBits(&code_point, 6));  // 10xxxxxx
    str[2] = static_cast<char>(0x80 | ChopLowBits(&code_point, 6));  // 10xxxxxx
    str[1] = static_cast<char>(0x80 | ChopLowBits(&code_point, 6));  // 10xxxxxx
    str[0] = static_cast<char>(0xF0 | code_point);                   // 11110xxx
  }
  return str;
}

// The following two functions only make sense if the the system
// uses UTF-16 for wide string encoding. All supported systems
// with 16 bit wchar_t (Windows, Cygwin, Symbian OS) do use UTF-16.

// Determines if the arguments constitute UTF-16 surrogate pair
// and thus should be combined into a single Unicode code point
// using CreateCodePointFromUtf16SurrogatePair.
inline bool IsUtf16SurrogatePair(wchar_t first, wchar_t second) {
  return sizeof(wchar_t) == 2 &&
      (first & 0xFC00) == 0xD800 && (second & 0xFC00) == 0xDC00;
}

// Creates a Unicode code point from UTF16 surrogate pair.
inline UInt32 CreateCodePointFromUtf16SurrogatePair(wchar_t first,
                                                    wchar_t second) {
  const UInt32 mask = (1 << 10) - 1;
  return (sizeof(wchar_t) == 2) ?
      (((first & mask) << 10) | (second & mask)) + 0x10000 :
      // This function should not be called when the condition is
      // false, but we provide a sensible default in case it is.
      static_cast<UInt32>(first);
}

// Converts a wide string to a narrow string in UTF-8 encoding.
// The wide string is assumed to have the following encoding:
//   UTF-16 if sizeof(wchar_t) == 2 (on Windows, Cygwin, Symbian OS)
//   UTF-32 if sizeof(wchar_t) == 4 (on Linux)
// Parameter str points to a null-terminated wide string.
// Parameter num_chars may additionally limit the number
// of wchar_t characters processed. -1 is used when the entire string
// should be processed.
// If the string contains code points that are not valid Unicode code points
// (i.e. outside of Unicode range U+0 to U+10FFFF) they will be output
// as '(Invalid Unicode 0xXXXXXXXX)'. If the string is in UTF16 encoding
// and contains invalid UTF-16 surrogate pairs, values in those pairs
// will be encoded as individual Unicode characters from Basic Normal Plane.
std::string WideStringToUtf8(const wchar_t* str, int num_chars) {
  if (num_chars == -1)
    num_chars = static_cast<int>(wcslen(str));

  ::std::stringstream stream;
  for (int i = 0; i < num_chars; ++i) {
    UInt32 unicode_code_point;

    if (str[i] == L'\0') {
      break;
    } else if (i + 1 < num_chars && IsUtf16SurrogatePair(str[i], str[i + 1])) {
      unicode_code_point = CreateCodePointFromUtf16SurrogatePair(str[i],
                                                                 str[i + 1]);
      i++;
    } else {
      unicode_code_point = static_cast<UInt32>(str[i]);
    }

    stream << CodePointToUtf8(unicode_code_point);
  }
  return StringStreamToString(&stream);
}

// Converts a wide C string to an std::string using the UTF-8 encoding.
// NULL will be converted to "(null)".
std::string String::ShowWideCString(const wchar_t * wide_c_str) {
  if (wide_c_str == NULL)  return "(null)";

  return internal::WideStringToUtf8(wide_c_str, -1);
}

// Compares two wide C strings.  Returns true iff they have the same
// content.
//
// Unlike wcscmp(), this function can handle NULL argument(s).  A NULL
// C string is considered different to any non-NULL C string,
// including the empty string.
bool String::WideCStringEquals(const wchar_t * lhs, const wchar_t * rhs) {
  if (lhs == NULL) return rhs == NULL;

  if (rhs == NULL) return false;

  return wcscmp(lhs, rhs) == 0;
}

// Helper function for *_STREQ on wide strings.
AssertionResult CmpHelperSTREQ(const char* expected_expression,
                               const char* actual_expression,
                               const wchar_t* expected,
                               const wchar_t* actual) {
  if (String::WideCStringEquals(expected, actual)) {
    return AssertionSuccess();
  }

  return EqFailure(expected_expression,
                   actual_expression,
                   PrintToString(expected),
                   PrintToString(actual),
                   false);
}

// Helper function for *_STRNE on wide strings.
AssertionResult CmpHelperSTRNE(const char* s1_expression,
                               const char* s2_expression,
                               const wchar_t* s1,
                               const wchar_t* s2) {
  if (!String::WideCStringEquals(s1, s2)) {
    return AssertionSuccess();
  }

  return AssertionFailure() << "Expected: (" << s1_expression << ") != ("
                            << s2_expression << "), actual: "
                            << PrintToString(s1)
                            << " vs " << PrintToString(s2);
}

// Compares two C strings, ignoring case.  Returns true iff they have
// the same content.
//
// Unlike strcasecmp(), this function can handle NULL argument(s).  A
// NULL C string is considered different to any non-NULL C string,
// including the empty string.
bool String::CaseInsensitiveCStringEquals(const char * lhs, const char * rhs) {
  if (lhs == NULL)
    return rhs == NULL;
  if (rhs == NULL)
    return false;
  return posix::StrCaseCmp(lhs, rhs) == 0;
}

  // Compares two wide C strings, ignoring case.  Returns true iff they
  // have the same content.
  //
  // Unlike wcscasecmp(), this function can handle NULL argument(s).
  // A NULL C string is considered different to any non-NULL wide C string,
  // including the empty string.
  // NB: The implementations on different platforms slightly differ.
  // On windows, this method uses _wcsicmp which compares according to LC_CTYPE
  // environment variable. On GNU platform this method uses wcscasecmp
  // which compares according to LC_CTYPE category of the current locale.
  // On MacOS X, it uses towlower, which also uses LC_CTYPE category of the
  // current locale.
bool String::CaseInsensitiveWideCStringEquals(const wchar_t* lhs,
                                              const wchar_t* rhs) {
  if (lhs == NULL) return rhs == NULL;

  if (rhs == NULL) return false;

#if GTEST_OS_WINDOWS
  return _wcsicmp(lhs, rhs) == 0;
#elif GTEST_OS_LINUX && !GTEST_OS_LINUX_ANDROID
  return wcscasecmp(lhs, rhs) == 0;
#else
  // Android, Mac OS X and Cygwin don't define wcscasecmp.
  // Other unknown OSes may not define it either.
  wint_t left, right;
  do {
    left = towlower(*lhs++);
    right = towlower(*rhs++);
  } while (left && left == right);
  return left == right;
#endif  // OS selector
}

// Returns true iff str ends with the given suffix, ignoring case.
// Any string is considered to end with an empty suffix.
bool String::EndsWithCaseInsensitive(
    const std::string& str, const std::string& suffix) {
  const size_t str_len = str.length();
  const size_t suffix_len = suffix.length();
  return (str_len >= suffix_len) &&
         CaseInsensitiveCStringEquals(str.c_str() + str_len - suffix_len,
                                      suffix.c_str());
}

// Formats an int value as "%02d".
std::string String::FormatIntWidth2(int value) {
  std::stringstream ss;
  ss << std::setfill('0') << std::setw(2) << value;
  return ss.str();
}

// Formats an int value as "%X".
std::string String::FormatHexInt(int value) {
  std::stringstream ss;
  ss << std::hex << std::uppercase << value;
  return ss.str();
}

// Formats a byte as "%02X".
std::string String::FormatByte(unsigned char value) {
  std::stringstream ss;
  ss << std::setfill('0') << std::setw(2) << std::hex << std::uppercase
     << static_cast<unsigned int>(value);
  return ss.str();
}

// Converts the buffer in a stringstream to an std::string, converting NUL
// bytes to "\\0" along the way.
std::string StringStreamToString(::std::stringstream* ss) {
  const ::std::string& str = ss->str();
  const char* const start = str.c_str();
  const char* const end = start + str.length();

  std::string result;
  result.reserve(2 * (end - start));
  for (const char* ch = start; ch != end; ++ch) {
    if (*ch == '\0') {
      result += "\\0";  // Replaces NUL with "\\0";
    } else {
      result += *ch;
    }
  }

  return result;
}

// Appends the user-supplied message to the Google-Test-generated message.
std::string AppendUserMessage(const std::string& gtest_msg,
                              const Message& user_msg) {
  // Appends the user message if it's non-empty.
  const std::string user_msg_string = user_msg.GetString();
  if (user_msg_string.empty()) {
    return gtest_msg;
  }

  return gtest_msg + "\n" + user_msg_string;
}

}  // namespace internal

// class TestResult

// Creates an empty TestResult.
TestResult::TestResult()
    : death_test_count_(0),
      elapsed_time_(0) {
}

// D'tor.
TestResult::~TestResult() {
}

// Returns the i-th test part result among all the results. i can
// range from 0 to total_part_count() - 1. If i is not in that range,
// aborts the program.
const TestPartResult& TestResult::GetTestPartResult(int i) const {
  if (i < 0 || i >= total_part_count())
    internal::posix::Abort();
  return test_part_results_.at(i);
}

// Returns the i-th test property. i can range from 0 to
// test_property_count() - 1. If i is not in that range, aborts the
// program.
const TestProperty& TestResult::GetTestProperty(int i) const {
  if (i < 0 || i >= test_property_count())
    internal::posix::Abort();
  return test_properties_.at(i);
}

// Clears the test part results.
void TestResult::ClearTestPartResults() {
  test_part_results_.clear();
}

// Adds a test part result to the list.
void TestResult::AddTestPartResult(const TestPartResult& test_part_result) {
  test_part_results_.push_back(test_part_result);
}

// Adds a test property to the list. If a property with the same key as the
// supplied property is already represented, the value of this test_property
// replaces the old value for that key.
void TestResult::RecordProperty(const std::string& xml_element,
                                const TestProperty& test_property) {
  if (!ValidateTestProperty(xml_element, test_property)) {
    return;
  }
  internal::MutexLock lock(&test_properites_mutex_);
  const std::vector<TestProperty>::iterator property_with_matching_key =
      std::find_if(test_properties_.begin(), test_properties_.end(),
                   internal::TestPropertyKeyIs(test_property.key()));
  if (property_with_matching_key == test_properties_.end()) {
    test_properties_.push_back(test_property);
    return;
  }
  property_with_matching_key->SetValue(test_property.value());
}

// The list of reserved attributes used in the <testsuites> element of XML
// output.
static const char* const kReservedTestSuitesAttributes[] = {
  "disabled",
  "errors",
  "failures",
  "name",
  "random_seed",
  "tests",
  "time",
  "timestamp"
};

// The list of reserved attributes used in the <testsuite> element of XML
// output.
static const char* const kReservedTestSuiteAttributes[] = {
  "disabled",
  "errors",
  "failures",
  "name",
  "tests",
  "time"
};

// The list of reserved attributes used in the <testcase> element of XML output.
static const char* const kReservedTestCaseAttributes[] = {
  "classname",
  "name",
  "status",
  "time",
  "type_param",
  "value_param"
};

template <int kSize>
std::vector<std::string> ArrayAsVector(const char* const (&array)[kSize]) {
  return std::vector<std::string>(array, array + kSize);
}

static std::vector<std::string> GetReservedAttributesForElement(
    const std::string& xml_element) {
  if (xml_element == "testsuites") {
    return ArrayAsVector(kReservedTestSuitesAttributes);
  } else if (xml_element == "testsuite") {
    return ArrayAsVector(kReservedTestSuiteAttributes);
  } else if (xml_element == "testcase") {
    return ArrayAsVector(kReservedTestCaseAttributes);
  } else {
    GTEST_CHECK_(false) << "Unrecognized xml_element provided: " << xml_element;
  }
  // This code is unreachable but some compilers may not realizes that.
  return std::vector<std::string>();
}

static std::string FormatWordList(const std::vector<std::string>& words) {
  Message word_list;
  for (size_t i = 0; i < words.size(); ++i) {
    if (i > 0 && words.size() > 2) {
      word_list << ", ";
    }
    if (i == words.size() - 1) {
      word_list << "and ";
    }
    word_list << "'" << words[i] << "'";
  }
  return word_list.GetString();
}

bool ValidateTestPropertyName(const std::string& property_name,
                              const std::vector<std::string>& reserved_names) {
  if (std::find(reserved_names.begin(), reserved_names.end(), property_name) !=
          reserved_names.end()) {
    ADD_FAILURE() << "Reserved key used in RecordProperty(): " << property_name
                  << " (" << FormatWordList(reserved_names)
                  << " are reserved by " << GTEST_NAME_ << ")";
    return false;
  }
  return true;
}

// Adds a failure if the key is a reserved attribute of the element named
// xml_element.  Returns true if the property is valid.
bool TestResult::ValidateTestProperty(const std::string& xml_element,
                                      const TestProperty& test_property) {
  return ValidateTestPropertyName(test_property.key(),
                                  GetReservedAttributesForElement(xml_element));
}

// Clears the object.
void TestResult::Clear() {
  test_part_results_.clear();
  test_properties_.clear();
  death_test_count_ = 0;
  elapsed_time_ = 0;
}

// Returns true iff the test failed.
bool TestResult::Failed() const {
  for (int i = 0; i < total_part_count(); ++i) {
    if (GetTestPartResult(i).failed())
      return true;
  }
  return false;
}

// Returns true iff the test part fatally failed.
static bool TestPartFatallyFailed(const TestPartResult& result) {
  return result.fatally_failed();
}

// Returns true iff the test fatally failed.
bool TestResult::HasFatalFailure() const {
  return CountIf(test_part_results_, TestPartFatallyFailed) > 0;
}

// Returns true iff the test part non-fatally failed.
static bool TestPartNonfatallyFailed(const TestPartResult& result) {
  return result.nonfatally_failed();
}

// Returns true iff the test has a non-fatal failure.
bool TestResult::HasNonfatalFailure() const {
  return CountIf(test_part_results_, TestPartNonfatallyFailed) > 0;
}

// Gets the number of all test parts.  This is the sum of the number
// of successful test parts and the number of failed test parts.
int TestResult::total_part_count() const {
  return static_cast<int>(test_part_results_.size());
}

// Returns the number of the test properties.
int TestResult::test_property_count() const {
  return static_cast<int>(test_properties_.size());
}

// class Test

// Creates a Test object.

// The c'tor saves the states of all flags.
Test::Test()
    : gtest_flag_saver_(new GTEST_FLAG_SAVER_) {
}

// The d'tor restores the states of all flags.  The actual work is
// done by the d'tor of the gtest_flag_saver_ field, and thus not
// visible here.
Test::~Test() {
}

// Sets up the test fixture.
//
// A sub-class may override this.
void Test::SetUp() {
}

// Tears down the test fixture.
//
// A sub-class may override this.
void Test::TearDown() {
}

// Allows user supplied key value pairs to be recorded for later output.
void Test::RecordProperty(const std::string& key, const std::string& value) {
  UnitTest::GetInstance()->RecordProperty(key, value);
}

// Allows user supplied key value pairs to be recorded for later output.
void Test::RecordProperty(const std::string& key, int value) {
  Message value_message;
  value_message << value;
  RecordProperty(key, value_message.GetString().c_str());
}

namespace internal {

void ReportFailureInUnknownLocation(TestPartResult::Type result_type,
                                    const std::string& message) {
  // This function is a friend of UnitTest and as such has access to
  // AddTestPartResult.
  UnitTest::GetInstance()->AddTestPartResult(
      result_type,
      NULL,  // No info about the source file where the exception occurred.
      -1,    // We have no info on which line caused the exception.
      message,
      "");   // No stack trace, either.
}

}  // namespace internal

// Google Test requires all tests in the same test case to use the same test
// fixture class.  This function checks if the current test has the
// same fixture class as the first test in the current test case.  If
// yes, it returns true; otherwise it generates a Google Test failure and
// returns false.
bool Test::HasSameFixtureClass() {
  internal::UnitTestImpl* const impl = internal::GetUnitTestImpl();
  const TestCase* const test_case = impl->current_test_case();

  // Info about the first test in the current test case.
  const TestInfo* const first_test_info = test_case->test_info_list()[0];
  const internal::TypeId first_fixture_id = first_test_info->fixture_class_id_;
  const char* const first_test_name = first_test_info->name();

  // Info about the current test.
  const TestInfo* const this_test_info = impl->current_test_info();
  const internal::TypeId this_fixture_id = this_test_info->fixture_class_id_;
  const char* const this_test_name = this_test_info->name();

  if (this_fixture_id != first_fixture_id) {
    // Is the first test defined using TEST?
    const bool first_is_TEST = first_fixture_id == internal::GetTestTypeId();
    // Is this test defined using TEST?
    const bool this_is_TEST = this_fixture_id == internal::GetTestTypeId();

    if (first_is_TEST || this_is_TEST) {
      // Both TEST and TEST_F appear in same test case, which is incorrect.
      // Tell the user how to fix this.

      // Gets the name of the TEST and the name of the TEST_F.  Note
      // that first_is_TEST and this_is_TEST cannot both be true, as
      // the fixture IDs are different for the two tests.
      const char* const TEST_name =
          first_is_TEST ? first_test_name : this_test_name;
      const char* const TEST_F_name =
          first_is_TEST ? this_test_name : first_test_name;

      ADD_FAILURE()
          << "All tests in the same test case must use the same test fixture\n"
          << "class, so mixing TEST_F and TEST in the same test case is\n"
          << "illegal.  In test case " << this_test_info->test_case_name()
          << ",\n"
          << "test " << TEST_F_name << " is defined using TEST_F but\n"
          << "test " << TEST_name << " is defined using TEST.  You probably\n"
          << "want to change the TEST to TEST_F or move it to another test\n"
          << "case.";
    } else {
      // Two fixture classes with the same name appear in two different
      // namespaces, which is not allowed. Tell the user how to fix this.
      ADD_FAILURE()
          << "All tests in the same test case must use the same test fixture\n"
          << "class.  However, in test case "
          << this_test_info->test_case_name() << ",\n"
          << "you defined test " << first_test_name
          << " and test " << this_test_name << "\n"
          << "using two different test fixture classes.  This can happen if\n"
          << "the two classes are from different namespaces or translation\n"
          << "units and have the same name.  You should probably rename one\n"
          << "of the classes to put the tests into different test cases.";
    }
    return false;
  }

  return true;
}

#if GTEST_HAS_SEH

// Adds an "exception thrown" fatal failure to the current test.  This
// function returns its result via an output parameter pointer because VC++
// prohibits creation of objects with destructors on stack in functions
// using __try (see error C2712).
static std::string* FormatSehExceptionMessage(DWORD exception_code,
                                              const char* location) {
  Message message;
  message << "SEH exception with code 0x" << std::setbase(16) <<
    exception_code << std::setbase(10) << " thrown in " << location << ".";

  return new std::string(message.GetString());
}

#endif  // GTEST_HAS_SEH

namespace internal {

#if GTEST_HAS_EXCEPTIONS

// Adds an "exception thrown" fatal failure to the current test.
static std::string FormatCxxExceptionMessage(const char* description,
                                             const char* location) {
  Message message;
  if (description != NULL) {
    message << "C++ exception with description \"" << description << "\"";
  } else {
    message << "Unknown C++ exception";
  }
  message << " thrown in " << location << ".";

  return message.GetString();
}

static std::string PrintTestPartResultToString(
    const TestPartResult& test_part_result);

GoogleTestFailureException::GoogleTestFailureException(
    const TestPartResult& failure)
    : ::std::runtime_error(PrintTestPartResultToString(failure).c_str()) {}

#endif  // GTEST_HAS_EXCEPTIONS

// We put these helper functions in the internal namespace as IBM's xlC
// compiler rejects the code if they were declared static.

// Runs the given method and handles SEH exceptions it throws, when
// SEH is supported; returns the 0-value for type Result in case of an
// SEH exception.  (Microsoft compilers cannot handle SEH and C++
// exceptions in the same function.  Therefore, we provide a separate
// wrapper function for handling SEH exceptions.)
template <class T, typename Result>
Result HandleSehExceptionsInMethodIfSupported(
    T* object, Result (T::*method)(), const char* location) {
#if GTEST_HAS_SEH
  __try {
    return (object->*method)();
  } __except (internal::UnitTestOptions::GTestShouldProcessSEH(  // NOLINT
      GetExceptionCode())) {
    // We create the exception message on the heap because VC++ prohibits
    // creation of objects with destructors on stack in functions using __try
    // (see error C2712).
    std::string* exception_message = FormatSehExceptionMessage(
        GetExceptionCode(), location);
    internal::ReportFailureInUnknownLocation(TestPartResult::kFatalFailure,
                                             *exception_message);
    delete exception_message;
    return static_cast<Result>(0);
  }
#else
  (void)location;
  return (object->*method)();
#endif  // GTEST_HAS_SEH
}

// Runs the given method and catches and reports C++ and/or SEH-style
// exceptions, if they are supported; returns the 0-value for type
// Result in case of an SEH exception.
template <class T, typename Result>
Result HandleExceptionsInMethodIfSupported(
    T* object, Result (T::*method)(), const char* location) {
  // NOTE: The user code can affect the way in which Google Test handles
  // exceptions by setting GTEST_FLAG(catch_exceptions), but only before
  // RUN_ALL_TESTS() starts. It is technically possible to check the flag
  // after the exception is caught and either report or re-throw the
  // exception based on the flag's value:
  //
  // try {
  //   // Perform the test method.
  // } catch (...) {
  //   if (GTEST_FLAG(catch_exceptions))
  //     // Report the exception as failure.
  //   else
  //     throw;  // Re-throws the original exception.
  // }
  //
  // However, the purpose of this flag is to allow the program to drop into
  // the debugger when the exception is thrown. On most platforms, once the
  // control enters the catch block, the exception origin information is
  // lost and the debugger will stop the program at the point of the
  // re-throw in this function -- instead of at the point of the original
  // throw statement in the code under test.  For this reason, we perform
  // the check early, sacrificing the ability to affect Google Test's
  // exception handling in the method where the exception is thrown.
  if (internal::GetUnitTestImpl()->catch_exceptions()) {
#if GTEST_HAS_EXCEPTIONS
    try {
      return HandleSehExceptionsInMethodIfSupported(object, method, location);
    } catch (const internal::GoogleTestFailureException&) {  // NOLINT
      // This exception type can only be thrown by a failed Google
      // Test assertion with the intention of letting another testing
      // framework catch it.  Therefore we just re-throw it.
      throw;
    } catch (const std::exception& e) {  // NOLINT
      internal::ReportFailureInUnknownLocation(
          TestPartResult::kFatalFailure,
          FormatCxxExceptionMessage(e.what(), location));
    } catch (...) {  // NOLINT
      internal::ReportFailureInUnknownLocation(
          TestPartResult::kFatalFailure,
          FormatCxxExceptionMessage(NULL, location));
    }
    return static_cast<Result>(0);
#else
    return HandleSehExceptionsInMethodIfSupported(object, method, location);
#endif  // GTEST_HAS_EXCEPTIONS
  } else {
    return (object->*method)();
  }
}

}  // namespace internal

// Runs the test and updates the test result.
void Test::Run() {
  if (!HasSameFixtureClass()) return;

  internal::UnitTestImpl* const impl = internal::GetUnitTestImpl();
  impl->os_stack_trace_getter()->UponLeavingGTest();
  internal::HandleExceptionsInMethodIfSupported(this, &Test::SetUp, "SetUp()");
  // We will run the test only if SetUp() was successful.
  if (!HasFatalFailure()) {
    impl->os_stack_trace_getter()->UponLeavingGTest();
    internal::HandleExceptionsInMethodIfSupported(
        this, &Test::TestBody, "the test body");
  }

  // However, we want to clean up as much as possible.  Hence we will
  // always call TearDown(), even if SetUp() or the test body has
  // failed.
  impl->os_stack_trace_getter()->UponLeavingGTest();
  internal::HandleExceptionsInMethodIfSupported(
      this, &Test::TearDown, "TearDown()");
}

// Returns true iff the current test has a fatal failure.
bool Test::HasFatalFailure() {
  return internal::GetUnitTestImpl()->current_test_result()->HasFatalFailure();
}

// Returns true iff the current test has a non-fatal failure.
bool Test::HasNonfatalFailure() {
  return internal::GetUnitTestImpl()->current_test_result()->
      HasNonfatalFailure();
}

// class TestInfo

// Constructs a TestInfo object. It assumes ownership of the test factory
// object.
TestInfo::TestInfo(const std::string& a_test_case_name,
                   const std::string& a_name,
                   const char* a_type_param,
                   const char* a_value_param,
                   internal::CodeLocation a_code_location,
                   internal::TypeId fixture_class_id,
                   internal::TestFactoryBase* factory)
    : test_case_name_(a_test_case_name),
      name_(a_name),
      type_param_(a_type_param ? new std::string(a_type_param) : NULL),
      value_param_(a_value_param ? new std::string(a_value_param) : NULL),
      location_(a_code_location),
      fixture_class_id_(fixture_class_id),
      should_run_(false),
      is_disabled_(false),
      matches_filter_(false),
      factory_(factory),
      result_() {}

// Destructs a TestInfo object.
TestInfo::~TestInfo() { delete factory_; }

namespace internal {

// Creates a new TestInfo object and registers it with Google Test;
// returns the created object.
//
// Arguments:
//
//   test_case_name:   name of the test case
//   name:             name of the test
//   type_param:       the name of the test's type parameter, or NULL if
//                     this is not a typed or a type-parameterized test.
//   value_param:      text representation of the test's value parameter,
//                     or NULL if this is not a value-parameterized test.
//   code_location:    code location where the test is defined
//   fixture_class_id: ID of the test fixture class
//   set_up_tc:        pointer to the function that sets up the test case
//   tear_down_tc:     pointer to the function that tears down the test case
//   factory:          pointer to the factory that creates a test object.
//                     The newly created TestInfo instance will assume
//                     ownership of the factory object.
TestInfo* MakeAndRegisterTestInfo(
    const char* test_case_name,
    const char* name,
    const char* type_param,
    const char* value_param,
    CodeLocation code_location,
    TypeId fixture_class_id,
    SetUpTestCaseFunc set_up_tc,
    TearDownTestCaseFunc tear_down_tc,
    TestFactoryBase* factory) {
  TestInfo* const test_info =
      new TestInfo(test_case_name, name, type_param, value_param,
                   code_location, fixture_class_id, factory);
  GetUnitTestImpl()->AddTestInfo(set_up_tc, tear_down_tc, test_info);
  return test_info;
}

#if GTEST_HAS_PARAM_TEST
void ReportInvalidTestCaseType(const char* test_case_name,
                               CodeLocation code_location) {
  Message errors;
  errors
      << "Attempted redefinition of test case " << test_case_name << ".\n"
      << "All tests in the same test case must use the same test fixture\n"
      << "class.  However, in test case " << test_case_name << ", you tried\n"
      << "to define a test using a fixture class different from the one\n"
      << "used earlier. This can happen if the two fixture classes are\n"
      << "from different namespaces and have the same name. You should\n"
      << "probably rename one of the classes to put the tests into different\n"
      << "test cases.";

  fprintf(stderr, "%s %s",
          FormatFileLocation(code_location.file.c_str(),
                             code_location.line).c_str(),
          errors.GetString().c_str());
}
#endif  // GTEST_HAS_PARAM_TEST

}  // namespace internal

namespace {

// A predicate that checks the test name of a TestInfo against a known
// value.
//
// This is used for implementation of the TestCase class only.  We put
// it in the anonymous namespace to prevent polluting the outer
// namespace.
//
// TestNameIs is copyable.
class TestNameIs {
 public:
  // Constructor.
  //
  // TestNameIs has NO default constructor.
  explicit TestNameIs(const char* name)
      : name_(name) {}

  // Returns true iff the test name of test_info matches name_.
  bool operator()(const TestInfo * test_info) const {
    return test_info && test_info->name() == name_;
  }

 private:
  std::string name_;
};

}  // namespace

namespace internal {

// This method expands all parameterized tests registered with macros TEST_P
// and INSTANTIATE_TEST_CASE_P into regular tests and registers those.
// This will be done just once during the program runtime.
void UnitTestImpl::RegisterParameterizedTests() {
#if GTEST_HAS_PARAM_TEST
  if (!parameterized_tests_registered_) {
    parameterized_test_registry_.RegisterTests();
    parameterized_tests_registered_ = true;
  }
#endif
}

}  // namespace internal

// Creates the test object, runs it, records its result, and then
// deletes it.
void TestInfo::Run() {
  if (!should_run_) return;

  // Tells UnitTest where to store test result.
  internal::UnitTestImpl* const impl = internal::GetUnitTestImpl();
  impl->set_current_test_info(this);

  TestEventListener* repeater = UnitTest::GetInstance()->listeners().repeater();

  // Notifies the unit test event listeners that a test is about to start.
  repeater->OnTestStart(*this);

  const TimeInMillis start = internal::GetTimeInMillis();

  impl->os_stack_trace_getter()->UponLeavingGTest();

  // Creates the test object.
  Test* const test = internal::HandleExceptionsInMethodIfSupported(
      factory_, &internal::TestFactoryBase::CreateTest,
      "the test fixture's constructor");

  // Runs the test only if the test object was created and its
  // constructor didn't generate a fatal failure.
  if ((test != NULL) && !Test::HasFatalFailure()) {
    // This doesn't throw as all user code that can throw are wrapped into
    // exception handling code.
    test->Run();
  }

  // Deletes the test object.
  impl->os_stack_trace_getter()->UponLeavingGTest();
  internal::HandleExceptionsInMethodIfSupported(
      test, &Test::DeleteSelf_, "the test fixture's destructor");

  result_.set_elapsed_time(internal::GetTimeInMillis() - start);

  // Notifies the unit test event listener that a test has just finished.
  repeater->OnTestEnd(*this);

  // Tells UnitTest to stop associating assertion results to this
  // test.
  impl->set_current_test_info(NULL);
}

// class TestCase

// Gets the number of successful tests in this test case.
int TestCase::successful_test_count() const {
  return CountIf(test_info_list_, TestPassed);
}

// Gets the number of failed tests in this test case.
int TestCase::failed_test_count() const {
  return CountIf(test_info_list_, TestFailed);
}

// Gets the number of disabled tests that will be reported in the XML report.
int TestCase::reportable_disabled_test_count() const {
  return CountIf(test_info_list_, TestReportableDisabled);
}

// Gets the number of disabled tests in this test case.
int TestCase::disabled_test_count() const {
  return CountIf(test_info_list_, TestDisabled);
}

// Gets the number of tests to be printed in the XML report.
int TestCase::reportable_test_count() const {
  return CountIf(test_info_list_, TestReportable);
}

// Get the number of tests in this test case that should run.
int TestCase::test_to_run_count() const {
  return CountIf(test_info_list_, ShouldRunTest);
}

// Gets the number of all tests.
int TestCase::total_test_count() const {
  return static_cast<int>(test_info_list_.size());
}

// Creates a TestCase with the given name.
//
// Arguments:
//
//   name:         name of the test case
//   a_type_param: the name of the test case's type parameter, or NULL if
//                 this is not a typed or a type-parameterized test case.
//   set_up_tc:    pointer to the function that sets up the test case
//   tear_down_tc: pointer to the function that tears down the test case
TestCase::TestCase(const char* a_name, const char* a_type_param,
                   Test::SetUpTestCaseFunc set_up_tc,
                   Test::TearDownTestCaseFunc tear_down_tc)
    : name_(a_name),
      type_param_(a_type_param ? new std::string(a_type_param) : NULL),
      set_up_tc_(set_up_tc),
      tear_down_tc_(tear_down_tc),
      should_run_(false),
      elapsed_time_(0) {
}

// Destructor of TestCase.
TestCase::~TestCase() {
  // Deletes every Test in the collection.
  ForEach(test_info_list_, internal::Delete<TestInfo>);
}

// Returns the i-th test among all the tests. i can range from 0 to
// total_test_count() - 1. If i is not in that range, returns NULL.
const TestInfo* TestCase::GetTestInfo(int i) const {
  const int index = GetElementOr(test_indices_, i, -1);
  return index < 0 ? NULL : test_info_list_[index];
}

// Returns the i-th test among all the tests. i can range from 0 to
// total_test_count() - 1. If i is not in that range, returns NULL.
TestInfo* TestCase::GetMutableTestInfo(int i) {
  const int index = GetElementOr(test_indices_, i, -1);
  return index < 0 ? NULL : test_info_list_[index];
}

// Adds a test to this test case.  Will delete the test upon
// destruction of the TestCase object.
void TestCase::AddTestInfo(TestInfo * test_info) {
  test_info_list_.push_back(test_info);
  test_indices_.push_back(static_cast<int>(test_indices_.size()));
}

// Runs every test in this TestCase.
void TestCase::Run() {
  if (!should_run_) return;

  internal::UnitTestImpl* const impl = internal::GetUnitTestImpl();
  impl->set_current_test_case(this);

  TestEventListener* repeater = UnitTest::GetInstance()->listeners().repeater();

  repeater->OnTestCaseStart(*this);
  impl->os_stack_trace_getter()->UponLeavingGTest();
  internal::HandleExceptionsInMethodIfSupported(
      this, &TestCase::RunSetUpTestCase, "SetUpTestCase()");

  const internal::TimeInMillis start = internal::GetTimeInMillis();
  for (int i = 0; i < total_test_count(); i++) {
    GetMutableTestInfo(i)->Run();
  }
  elapsed_time_ = internal::GetTimeInMillis() - start;

  impl->os_stack_trace_getter()->UponLeavingGTest();
  internal::HandleExceptionsInMethodIfSupported(
      this, &TestCase::RunTearDownTestCase, "TearDownTestCase()");

  repeater->OnTestCaseEnd(*this);
  impl->set_current_test_case(NULL);
}

// Clears the results of all tests in this test case.
void TestCase::ClearResult() {
  ad_hoc_test_result_.Clear();
  ForEach(test_info_list_, TestInfo::ClearTestResult);
}

// Shuffles the tests in this test case.
void TestCase::ShuffleTests(internal::Random* random) {
  Shuffle(random, &test_indices_);
}

// Restores the test order to before the first shuffle.
void TestCase::UnshuffleTests() {
  for (size_t i = 0; i < test_indices_.size(); i++) {
    test_indices_[i] = static_cast<int>(i);
  }
}

// Formats a countable noun.  Depending on its quantity, either the
// singular form or the plural form is used. e.g.
//
// FormatCountableNoun(1, "formula", "formuli") returns "1 formula".
// FormatCountableNoun(5, "book", "books") returns "5 books".
static std::string FormatCountableNoun(int count,
                                       const char * singular_form,
                                       const char * plural_form) {
  return internal::StreamableToString(count) + " " +
      (count == 1 ? singular_form : plural_form);
}

// Formats the count of tests.
static std::string FormatTestCount(int test_count) {
  return FormatCountableNoun(test_count, "test", "tests");
}

// Formats the count of test cases.
static std::string FormatTestCaseCount(int test_case_count) {
  return FormatCountableNoun(test_case_count, "test case", "test cases");
}

// Converts a TestPartResult::Type enum to human-friendly string
// representation.  Both kNonFatalFailure and kFatalFailure are translated
// to "Failure", as the user usually doesn't care about the difference
// between the two when viewing the test result.
static const char * TestPartResultTypeToString(TestPartResult::Type type) {
  switch (type) {
    case TestPartResult::kSuccess:
      return "Success";

    case TestPartResult::kNonFatalFailure:
    case TestPartResult::kFatalFailure:
#ifdef _MSC_VER
      return "error: ";
#else
      return "Failure\n";
#endif
    default:
      return "Unknown result type";
  }
}

namespace internal {

// Prints a TestPartResult to an std::string.
static std::string PrintTestPartResultToString(
    const TestPartResult& test_part_result) {
  return (Message()
          << internal::FormatFileLocation(test_part_result.file_name(),
                                          test_part_result.line_number())
          << " " << TestPartResultTypeToString(test_part_result.type())
          << test_part_result.message()).GetString();
}

// Prints a TestPartResult.
static void PrintTestPartResult(const TestPartResult& test_part_result) {
  const std::string& result =
      PrintTestPartResultToString(test_part_result);
  printf("%s\n", result.c_str());
  fflush(stdout);
  // If the test program runs in Visual Studio or a debugger, the
  // following statements add the test part result message to the Output
  // window such that the user can double-click on it to jump to the
  // corresponding source code location; otherwise they do nothing.
#if GTEST_OS_WINDOWS && !GTEST_OS_WINDOWS_MOBILE
  // We don't call OutputDebugString*() on Windows Mobile, as printing
  // to stdout is done by OutputDebugString() there already - we don't
  // want the same message printed twice.
  ::OutputDebugStringA(result.c_str());
  ::OutputDebugStringA("\n");
#endif
}

// class PrettyUnitTestResultPrinter

enum GTestColor {
  COLOR_DEFAULT,
  COLOR_RED,
  COLOR_GREEN,
  COLOR_YELLOW
};

#if GTEST_OS_WINDOWS && !GTEST_OS_WINDOWS_MOBILE && \
    !GTEST_OS_WINDOWS_PHONE && !GTEST_OS_WINDOWS_RT

// Returns the character attribute for the given color.
WORD GetColorAttribute(GTestColor color) {
  switch (color) {
    case COLOR_RED:    return FOREGROUND_RED;
    case COLOR_GREEN:  return FOREGROUND_GREEN;
    case COLOR_YELLOW: return FOREGROUND_RED | FOREGROUND_GREEN;
    default:           return 0;
  }
}

#else

// Returns the ANSI color code for the given color.  COLOR_DEFAULT is
// an invalid input.
const char* GetAnsiColorCode(GTestColor color) {
  switch (color) {
    case COLOR_RED:     return "1";
    case COLOR_GREEN:   return "2";
    case COLOR_YELLOW:  return "3";
    default:            return NULL;
  };
}

#endif  // GTEST_OS_WINDOWS && !GTEST_OS_WINDOWS_MOBILE

// Returns true iff Google Test should use colors in the output.
bool ShouldUseColor(bool stdout_is_tty) {
  const char* const gtest_color = GTEST_FLAG(color).c_str();

  if (String::CaseInsensitiveCStringEquals(gtest_color, "auto")) {
#if GTEST_OS_WINDOWS
    // On Windows the TERM variable is usually not set, but the
    // console there does support colors.
    return stdout_is_tty;
#else
    // On non-Windows platforms, we rely on the TERM variable.
    const char* const term = posix::GetEnv("TERM");
    const bool term_supports_color =
        String::CStringEquals(term, "xterm") ||
        String::CStringEquals(term, "xterm-color") ||
        String::CStringEquals(term, "xterm-256color") ||
        String::CStringEquals(term, "screen") ||
        String::CStringEquals(term, "screen-256color") ||
        String::CStringEquals(term, "rxvt-unicode") ||
        String::CStringEquals(term, "rxvt-unicode-256color") ||
        String::CStringEquals(term, "linux") ||
        String::CStringEquals(term, "cygwin");
    return stdout_is_tty && term_supports_color;
#endif  // GTEST_OS_WINDOWS
  }

  return String::CaseInsensitiveCStringEquals(gtest_color, "yes") ||
      String::CaseInsensitiveCStringEquals(gtest_color, "true") ||
      String::CaseInsensitiveCStringEquals(gtest_color, "t") ||
      String::CStringEquals(gtest_color, "1");
  // We take "yes", "true", "t", and "1" as meaning "yes".  If the
  // value is neither one of these nor "auto", we treat it as "no" to
  // be conservative.
}

// Helpers for printing colored strings to stdout. Note that on Windows, we
// cannot simply emit special characters and have the terminal change colors.
// This routine must actually emit the characters rather than return a string
// that would be colored when printed, as can be done on Linux.
void ColoredPrintf(GTestColor color, const char* fmt, ...) {
  va_list args;
  va_start(args, fmt);

#if GTEST_OS_WINDOWS_MOBILE || GTEST_OS_SYMBIAN || GTEST_OS_ZOS || \
    GTEST_OS_IOS || GTEST_OS_WINDOWS_PHONE || GTEST_OS_WINDOWS_RT
  const bool use_color = AlwaysFalse();
#else
  static const bool in_color_mode =
      ShouldUseColor(posix::IsATTY(posix::FileNo(stdout)) != 0);
  const bool use_color = in_color_mode && (color != COLOR_DEFAULT);
#endif  // GTEST_OS_WINDOWS_MOBILE || GTEST_OS_SYMBIAN || GTEST_OS_ZOS
  // The '!= 0' comparison is necessary to satisfy MSVC 7.1.

  if (!use_color) {
    vprintf(fmt, args);
    va_end(args);
    return;
  }

#if GTEST_OS_WINDOWS && !GTEST_OS_WINDOWS_MOBILE && \
    !GTEST_OS_WINDOWS_PHONE && !GTEST_OS_WINDOWS_RT
  const HANDLE stdout_handle = GetStdHandle(STD_OUTPUT_HANDLE);

  // Gets the current text color.
  CONSOLE_SCREEN_BUFFER_INFO buffer_info;
  GetConsoleScreenBufferInfo(stdout_handle, &buffer_info);
  const WORD old_color_attrs = buffer_info.wAttributes;

  // We need to flush the stream buffers into the console before each
  // SetConsoleTextAttribute call lest it affect the text that is already
  // printed but has not yet reached the console.
  fflush(stdout);
  SetConsoleTextAttribute(stdout_handle,
                          GetColorAttribute(color) | FOREGROUND_INTENSITY);
  vprintf(fmt, args);

  fflush(stdout);
  // Restores the text color.
  SetConsoleTextAttribute(stdout_handle, old_color_attrs);
#else
  printf("\033[0;3%sm", GetAnsiColorCode(color));
  vprintf(fmt, args);
  printf("\033[m");  // Resets the terminal to default.
#endif  // GTEST_OS_WINDOWS && !GTEST_OS_WINDOWS_MOBILE
  va_end(args);
}

// Text printed in Google Test's text output and --gunit_list_tests
// output to label the type parameter and value parameter for a test.
static const char kTypeParamLabel[] = "TypeParam";
static const char kValueParamLabel[] = "GetParam()";

void PrintFullTestCommentIfPresent(const TestInfo& test_info) {
  const char* const type_param = test_info.type_param();
  const char* const value_param = test_info.value_param();

  if (type_param != NULL || value_param != NULL) {
    printf(", where ");
    if (type_param != NULL) {
      printf("%s = %s", kTypeParamLabel, type_param);
      if (value_param != NULL)
        printf(" and ");
    }
    if (value_param != NULL) {
      printf("%s = %s", kValueParamLabel, value_param);
    }
  }
}

// This class implements the TestEventListener interface.
//
// Class PrettyUnitTestResultPrinter is copyable.
class PrettyUnitTestResultPrinter : public TestEventListener {
 public:
  PrettyUnitTestResultPrinter() {}
  static void PrintTestName(const char * test_case, const char * test) {
    printf("%s.%s", test_case, test);
  }

  // The following methods override what's in the TestEventListener class.
  virtual void OnTestProgramStart(const UnitTest& /*unit_test*/) {}
  virtual void OnTestIterationStart(const UnitTest& unit_test, int iteration);
  virtual void OnEnvironmentsSetUpStart(const UnitTest& unit_test);
  virtual void OnEnvironmentsSetUpEnd(const UnitTest& /*unit_test*/) {}
  virtual void OnTestCaseStart(const TestCase& test_case);
  virtual void OnTestStart(const TestInfo& test_info);
  virtual void OnTestPartResult(const TestPartResult& result);
  virtual void OnTestEnd(const TestInfo& test_info);
  virtual void OnTestCaseEnd(const TestCase& test_case);
  virtual void OnEnvironmentsTearDownStart(const UnitTest& unit_test);
  virtual void OnEnvironmentsTearDownEnd(const UnitTest& /*unit_test*/) {}
  virtual void OnTestIterationEnd(const UnitTest& unit_test, int iteration);
  virtual void OnTestProgramEnd(const UnitTest& /*unit_test*/) {}

 private:
  static void PrintFailedTests(const UnitTest& unit_test);
};

  // Fired before each iteration of tests starts.
void PrettyUnitTestResultPrinter::OnTestIterationStart(
    const UnitTest& unit_test, int iteration) {
  if (GTEST_FLAG(repeat) != 1)
    printf("\nRepeating all tests (iteration %d) . . .\n\n", iteration + 1);

  const char* const filter = GTEST_FLAG(filter).c_str();

  // Prints the filter if it's not *.  This reminds the user that some
  // tests may be skipped.
  if (!String::CStringEquals(filter, kUniversalFilter)) {
    ColoredPrintf(COLOR_YELLOW,
                  "Note: %s filter = %s\n", GTEST_NAME_, filter);
  }

  if (internal::ShouldShard(kTestTotalShards, kTestShardIndex, false)) {
    const Int32 shard_index = Int32FromEnvOrDie(kTestShardIndex, -1);
    ColoredPrintf(COLOR_YELLOW,
                  "Note: This is test shard %d of %s.\n",
                  static_cast<int>(shard_index) + 1,
                  internal::posix::GetEnv(kTestTotalShards));
  }

  if (GTEST_FLAG(shuffle)) {
    ColoredPrintf(COLOR_YELLOW,
                  "Note: Randomizing tests' orders with a seed of %d .\n",
                  unit_test.random_seed());
  }

  ColoredPrintf(COLOR_GREEN,  "[==========] ");
  printf("Running %s from %s.\n",
         FormatTestCount(unit_test.test_to_run_count()).c_str(),
         FormatTestCaseCount(unit_test.test_case_to_run_count()).c_str());
  fflush(stdout);
}

void PrettyUnitTestResultPrinter::OnEnvironmentsSetUpStart(
    const UnitTest& /*unit_test*/) {
  ColoredPrintf(COLOR_GREEN,  "[----------] ");
  printf("Global test environment set-up.\n");
  fflush(stdout);
}

void PrettyUnitTestResultPrinter::OnTestCaseStart(const TestCase& test_case) {
  const std::string counts =
      FormatCountableNoun(test_case.test_to_run_count(), "test", "tests");
  ColoredPrintf(COLOR_GREEN, "[----------] ");
  printf("%s from %s", counts.c_str(), test_case.name());
  if (test_case.type_param() == NULL) {
    printf("\n");
  } else {
    printf(", where %s = %s\n", kTypeParamLabel, test_case.type_param());
  }
  fflush(stdout);
}

void PrettyUnitTestResultPrinter::OnTestStart(const TestInfo& test_info) {
  ColoredPrintf(COLOR_GREEN,  "[ RUN      ] ");
  PrintTestName(test_info.test_case_name(), test_info.name());
  printf("\n");
  fflush(stdout);
}

// Called after an assertion failure.
void PrettyUnitTestResultPrinter::OnTestPartResult(
    const TestPartResult& result) {
  // If the test part succeeded, we don't need to do anything.
  if (result.type() == TestPartResult::kSuccess)
    return;

  // Print failure message from the assertion (e.g. expected this and got that).
  PrintTestPartResult(result);
  fflush(stdout);
}

void PrettyUnitTestResultPrinter::OnTestEnd(const TestInfo& test_info) {
  if (test_info.result()->Passed()) {
    ColoredPrintf(COLOR_GREEN, "[       OK ] ");
  } else {
    ColoredPrintf(COLOR_RED, "[  FAILED  ] ");
  }
  PrintTestName(test_info.test_case_name(), test_info.name());
  if (test_info.result()->Failed())
    PrintFullTestCommentIfPresent(test_info);

  if (GTEST_FLAG(print_time)) {
    printf(" (%s ms)\n", internal::StreamableToString(
           test_info.result()->elapsed_time()).c_str());
  } else {
    printf("\n");
  }
  fflush(stdout);
}

void PrettyUnitTestResultPrinter::OnTestCaseEnd(const TestCase& test_case) {
  if (!GTEST_FLAG(print_time)) return;

  const std::string counts =
      FormatCountableNoun(test_case.test_to_run_count(), "test", "tests");
  ColoredPrintf(COLOR_GREEN, "[----------] ");
  printf("%s from %s (%s ms total)\n\n",
         counts.c_str(), test_case.name(),
         internal::StreamableToString(test_case.elapsed_time()).c_str());
  fflush(stdout);
}

void PrettyUnitTestResultPrinter::OnEnvironmentsTearDownStart(
    const UnitTest& /*unit_test*/) {
  ColoredPrintf(COLOR_GREEN,  "[----------] ");
  printf("Global test environment tear-down\n");
  fflush(stdout);
}

// Internal helper for printing the list of failed tests.
void PrettyUnitTestResultPrinter::PrintFailedTests(const UnitTest& unit_test) {
  const int failed_test_count = unit_test.failed_test_count();
  if (failed_test_count == 0) {
    return;
  }

  for (int i = 0; i < unit_test.total_test_case_count(); ++i) {
    const TestCase& test_case = *unit_test.GetTestCase(i);
    if (!test_case.should_run() || (test_case.failed_test_count() == 0)) {
      continue;
    }
    for (int j = 0; j < test_case.total_test_count(); ++j) {
      const TestInfo& test_info = *test_case.GetTestInfo(j);
      if (!test_info.should_run() || test_info.result()->Passed()) {
        continue;
      }
      ColoredPrintf(COLOR_RED, "[  FAILED  ] ");
      printf("%s.%s", test_case.name(), test_info.name());
      PrintFullTestCommentIfPresent(test_info);
      printf("\n");
    }
  }
}

void PrettyUnitTestResultPrinter::OnTestIterationEnd(const UnitTest& unit_test,
                                                     int /*iteration*/) {
  ColoredPrintf(COLOR_GREEN,  "[==========] ");
  printf("%s from %s ran.",
         FormatTestCount(unit_test.test_to_run_count()).c_str(),
         FormatTestCaseCount(unit_test.test_case_to_run_count()).c_str());
  if (GTEST_FLAG(print_time)) {
    printf(" (%s ms total)",
           internal::StreamableToString(unit_test.elapsed_time()).c_str());
  }
  printf("\n");
  ColoredPrintf(COLOR_GREEN,  "[  PASSED  ] ");
  printf("%s.\n", FormatTestCount(unit_test.successful_test_count()).c_str());

  int num_failures = unit_test.failed_test_count();
  if (!unit_test.Passed()) {
    const int failed_test_count = unit_test.failed_test_count();
    ColoredPrintf(COLOR_RED,  "[  FAILED  ] ");
    printf("%s, listed below:\n", FormatTestCount(failed_test_count).c_str());
    PrintFailedTests(unit_test);
    printf("\n%2d FAILED %s\n", num_failures,
                        num_failures == 1 ? "TEST" : "TESTS");
  }

  int num_disabled = unit_test.reportable_disabled_test_count();
  if (num_disabled && !GTEST_FLAG(also_run_disabled_tests)) {
    if (!num_failures) {
      printf("\n");  // Add a spacer if no FAILURE banner is displayed.
    }
    ColoredPrintf(COLOR_YELLOW,
                  "  YOU HAVE %d DISABLED %s\n\n",
                  num_disabled,
                  num_disabled == 1 ? "TEST" : "TESTS");
  }
  // Ensure that Google Test output is printed before, e.g., heapchecker output.
  fflush(stdout);
}

// End PrettyUnitTestResultPrinter

// class TestEventRepeater
//
// This class forwards events to other event listeners.
class TestEventRepeater : public TestEventListener {
 public:
  TestEventRepeater() : forwarding_enabled_(true) {}
  virtual ~TestEventRepeater();
  void Append(TestEventListener *listener);
  TestEventListener* Release(TestEventListener* listener);

  // Controls whether events will be forwarded to listeners_. Set to false
  // in death test child processes.
  bool forwarding_enabled() const { return forwarding_enabled_; }
  void set_forwarding_enabled(bool enable) { forwarding_enabled_ = enable; }

  virtual void OnTestProgramStart(const UnitTest& unit_test);
  virtual void OnTestIterationStart(const UnitTest& unit_test, int iteration);
  virtual void OnEnvironmentsSetUpStart(const UnitTest& unit_test);
  virtual void OnEnvironmentsSetUpEnd(const UnitTest& unit_test);
  virtual void OnTestCaseStart(const TestCase& test_case);
  virtual void OnTestStart(const TestInfo& test_info);
  virtual void OnTestPartResult(const TestPartResult& result);
  virtual void OnTestEnd(const TestInfo& test_info);
  virtual void OnTestCaseEnd(const TestCase& test_case);
  virtual void OnEnvironmentsTearDownStart(const UnitTest& unit_test);
  virtual void OnEnvironmentsTearDownEnd(const UnitTest& unit_test);
  virtual void OnTestIterationEnd(const UnitTest& unit_test, int iteration);
  virtual void OnTestProgramEnd(const UnitTest& unit_test);

 private:
  // Controls whether events will be forwarded to listeners_. Set to false
  // in death test child processes.
  bool forwarding_enabled_;
  // The list of listeners that receive events.
  std::vector<TestEventListener*> listeners_;

  GTEST_DISALLOW_COPY_AND_ASSIGN_(TestEventRepeater);
};

TestEventRepeater::~TestEventRepeater() {
  ForEach(listeners_, Delete<TestEventListener>);
}

void TestEventRepeater::Append(TestEventListener *listener) {
  listeners_.push_back(listener);
}

// TODO(vladl@google.com): Factor the search functionality into Vector::Find.
TestEventListener* TestEventRepeater::Release(TestEventListener *listener) {
  for (size_t i = 0; i < listeners_.size(); ++i) {
    if (listeners_[i] == listener) {
      listeners_.erase(listeners_.begin() + i);
      return listener;
    }
  }

  return NULL;
}

// Since most methods are very similar, use macros to reduce boilerplate.
// This defines a member that forwards the call to all listeners.
#define GTEST_REPEATER_METHOD_(Name, Type) \
void TestEventRepeater::Name(const Type& parameter) { \
  if (forwarding_enabled_) { \
    for (size_t i = 0; i < listeners_.size(); i++) { \
      listeners_[i]->Name(parameter); \
    } \
  } \
}
// This defines a member that forwards the call to all listeners in reverse
// order.
#define GTEST_REVERSE_REPEATER_METHOD_(Name, Type) \
void TestEventRepeater::Name(const Type& parameter) { \
  if (forwarding_enabled_) { \
    for (int i = static_cast<int>(listeners_.size()) - 1; i >= 0; i--) { \
      listeners_[i]->Name(parameter); \
    } \
  } \
}

GTEST_REPEATER_METHOD_(OnTestProgramStart, UnitTest)
GTEST_REPEATER_METHOD_(OnEnvironmentsSetUpStart, UnitTest)
GTEST_REPEATER_METHOD_(OnTestCaseStart, TestCase)
GTEST_REPEATER_METHOD_(OnTestStart, TestInfo)
GTEST_REPEATER_METHOD_(OnTestPartResult, TestPartResult)
GTEST_REPEATER_METHOD_(OnEnvironmentsTearDownStart, UnitTest)
GTEST_REVERSE_REPEATER_METHOD_(OnEnvironmentsSetUpEnd, UnitTest)
GTEST_REVERSE_REPEATER_METHOD_(OnEnvironmentsTearDownEnd, UnitTest)
GTEST_REVERSE_REPEATER_METHOD_(OnTestEnd, TestInfo)
GTEST_REVERSE_REPEATER_METHOD_(OnTestCaseEnd, TestCase)
GTEST_REVERSE_REPEATER_METHOD_(OnTestProgramEnd, UnitTest)

#undef GTEST_REPEATER_METHOD_
#undef GTEST_REVERSE_REPEATER_METHOD_

void TestEventRepeater::OnTestIterationStart(const UnitTest& unit_test,
                                             int iteration) {
  if (forwarding_enabled_) {
    for (size_t i = 0; i < listeners_.size(); i++) {
      listeners_[i]->OnTestIterationStart(unit_test, iteration);
    }
  }
}

void TestEventRepeater::OnTestIterationEnd(const UnitTest& unit_test,
                                           int iteration) {
  if (forwarding_enabled_) {
    for (int i = static_cast<int>(listeners_.size()) - 1; i >= 0; i--) {
      listeners_[i]->OnTestIterationEnd(unit_test, iteration);
    }
  }
}

// End TestEventRepeater

// This class generates an XML output file.
class XmlUnitTestResultPrinter : public EmptyTestEventListener {
 public:
  explicit XmlUnitTestResultPrinter(const char* output_file);

  virtual void OnTestIterationEnd(const UnitTest& unit_test, int iteration);

 private:
  // Is c a whitespace character that is normalized to a space character
  // when it appears in an XML attribute value?
  static bool IsNormalizableWhitespace(char c) {
    return c == 0x9 || c == 0xA || c == 0xD;
  }

  // May c appear in a well-formed XML document?
  static bool IsValidXmlCharacter(char c) {
    return IsNormalizableWhitespace(c) || c >= 0x20;
  }

  // Returns an XML-escaped copy of the input string str.  If
  // is_attribute is true, the text is meant to appear as an attribute
  // value, and normalizable whitespace is preserved by replacing it
  // with character references.
  static std::string EscapeXml(const std::string& str, bool is_attribute);

  // Returns the given string with all characters invalid in XML removed.
  static std::string RemoveInvalidXmlCharacters(const std::string& str);

  // Convenience wrapper around EscapeXml when str is an attribute value.
  static std::string EscapeXmlAttribute(const std::string& str) {
    return EscapeXml(str, true);
  }

  // Convenience wrapper around EscapeXml when str is not an attribute value.
  static std::string EscapeXmlText(const char* str) {
    return EscapeXml(str, false);
  }

  // Verifies that the given attribute belongs to the given element and
  // streams the attribute as XML.
  static void OutputXmlAttribute(std::ostream* stream,
                                 const std::string& element_name,
                                 const std::string& name,
                                 const std::string& value);

  // Streams an XML CDATA section, escaping invalid CDATA sequences as needed.
  static void OutputXmlCDataSection(::std::ostream* stream, const char* data);

  // Streams an XML representation of a TestInfo object.
  static void OutputXmlTestInfo(::std::ostream* stream,
                                const char* test_case_name,
                                const TestInfo& test_info);

  // Prints an XML representation of a TestCase object
  static void PrintXmlTestCase(::std::ostream* stream,
                               const TestCase& test_case);

  // Prints an XML summary of unit_test to output stream out.
  static void PrintXmlUnitTest(::std::ostream* stream,
                               const UnitTest& unit_test);

  // Produces a string representing the test properties in a result as space
  // delimited XML attributes based on the property key="value" pairs.
  // When the std::string is not empty, it includes a space at the beginning,
  // to delimit this attribute from prior attributes.
  static std::string TestPropertiesAsXmlAttributes(const TestResult& result);

  // The output file.
  const std::string output_file_;

  GTEST_DISALLOW_COPY_AND_ASSIGN_(XmlUnitTestResultPrinter);
};

// Creates a new XmlUnitTestResultPrinter.
XmlUnitTestResultPrinter::XmlUnitTestResultPrinter(const char* output_file)
    : output_file_(output_file) {
  if (output_file_.c_str() == NULL || output_file_.empty()) {
    fprintf(stderr, "XML output file may not be null\n");
    fflush(stderr);
    exit(EXIT_FAILURE);
  }
}

// Called after the unit test ends.
void XmlUnitTestResultPrinter::OnTestIterationEnd(const UnitTest& unit_test,
                                                  int /*iteration*/) {
  FILE* xmlout = NULL;
  FilePath output_file(output_file_);
  FilePath output_dir(output_file.RemoveFileName());

  if (output_dir.CreateDirectoriesRecursively()) {
    xmlout = posix::FOpen(output_file_.c_str(), "w");
  }
  if (xmlout == NULL) {
    // TODO(wan): report the reason of the failure.
    //
    // We don't do it for now as:
    //
    //   1. There is no urgent need for it.
    //   2. It's a bit involved to make the errno variable thread-safe on
    //      all three operating systems (Linux, Windows, and Mac OS).
    //   3. To interpret the meaning of errno in a thread-safe way,
    //      we need the strerror_r() function, which is not available on
    //      Windows.
    fprintf(stderr,
            "Unable to open file \"%s\"\n",
            output_file_.c_str());
    fflush(stderr);
    exit(EXIT_FAILURE);
  }
  std::stringstream stream;
  PrintXmlUnitTest(&stream, unit_test);
  fprintf(xmlout, "%s", StringStreamToString(&stream).c_str());
  fclose(xmlout);
}

// Returns an XML-escaped copy of the input string str.  If is_attribute
// is true, the text is meant to appear as an attribute value, and
// normalizable whitespace is preserved by replacing it with character
// references.
//
// Invalid XML characters in str, if any, are stripped from the output.
// It is expected that most, if not all, of the text processed by this
// module will consist of ordinary English text.
// If this module is ever modified to produce version 1.1 XML output,
// most invalid characters can be retained using character references.
// TODO(wan): It might be nice to have a minimally invasive, human-readable
// escaping scheme for invalid characters, rather than dropping them.
std::string XmlUnitTestResultPrinter::EscapeXml(
    const std::string& str, bool is_attribute) {
  Message m;

  for (size_t i = 0; i < str.size(); ++i) {
    const char ch = str[i];
    switch (ch) {
      case '<':
        m << "&lt;";
        break;
      case '>':
        m << "&gt;";
        break;
      case '&':
        m << "&amp;";
        break;
      case '\'':
        if (is_attribute)
          m << "&apos;";
        else
          m << '\'';
        break;
      case '"':
        if (is_attribute)
          m << "&quot;";
        else
          m << '"';
        break;
      default:
        if (IsValidXmlCharacter(ch)) {
          if (is_attribute && IsNormalizableWhitespace(ch))
            m << "&#x" << String::FormatByte(static_cast<unsigned char>(ch))
              << ";";
          else
            m << ch;
        }
        break;
    }
  }

  return m.GetString();
}

// Returns the given string with all characters invalid in XML removed.
// Currently invalid characters are dropped from the string. An
// alternative is to replace them with certain characters such as . or ?.
std::string XmlUnitTestResultPrinter::RemoveInvalidXmlCharacters(
    const std::string& str) {
  std::string output;
  output.reserve(str.size());
  for (std::string::const_iterator it = str.begin(); it != str.end(); ++it)
    if (IsValidXmlCharacter(*it))
      output.push_back(*it);

  return output;
}

// The following routines generate an XML representation of a UnitTest
// object.
//
// This is how Google Test concepts map to the DTD:
//
// <testsuites name="AllTests">        <-- corresponds to a UnitTest object
//   <testsuite name="testcase-name">  <-- corresponds to a TestCase object
//     <testcase name="test-name">     <-- corresponds to a TestInfo object
//       <failure message="...">...</failure>
//       <failure message="...">...</failure>
//       <failure message="...">...</failure>
//                                     <-- individual assertion failures
//     </testcase>
//   </testsuite>
// </testsuites>

// Formats the given time in milliseconds as seconds.
std::string FormatTimeInMillisAsSeconds(TimeInMillis ms) {
  ::std::stringstream ss;
  ss << (static_cast<double>(ms) * 1e-3);
  return ss.str();
}

static bool PortableLocaltime(time_t seconds, struct tm* out) {
#if defined(_MSC_VER)
  return localtime_s(out, &seconds) == 0;
#elif defined(__MINGW32__) || defined(__MINGW64__)
  // MINGW <time.h> provides neither localtime_r nor localtime_s, but uses
  // Windows' localtime(), which has a thread-local tm buffer.
  struct tm* tm_ptr = localtime(&seconds);  // NOLINT
  if (tm_ptr == NULL)
    return false;
  *out = *tm_ptr;
  return true;
#else
  return localtime_r(&seconds, out) != NULL;
#endif
}

// Converts the given epoch time in milliseconds to a date string in the ISO
// 8601 format, without the timezone information.
std::string FormatEpochTimeInMillisAsIso8601(TimeInMillis ms) {
  struct tm time_struct;
  if (!PortableLocaltime(static_cast<time_t>(ms / 1000), &time_struct))
    return "";
  // YYYY-MM-DDThh:mm:ss
  return StreamableToString(time_struct.tm_year + 1900) + "-" +
      String::FormatIntWidth2(time_struct.tm_mon + 1) + "-" +
      String::FormatIntWidth2(time_struct.tm_mday) + "T" +
      String::FormatIntWidth2(time_struct.tm_hour) + ":" +
      String::FormatIntWidth2(time_struct.tm_min) + ":" +
      String::FormatIntWidth2(time_struct.tm_sec);
}

// Streams an XML CDATA section, escaping invalid CDATA sequences as needed.
void XmlUnitTestResultPrinter::OutputXmlCDataSection(::std::ostream* stream,
                                                     const char* data) {
  const char* segment = data;
  *stream << "<![CDATA[";
  for (;;) {
    const char* const next_segment = strstr(segment, "]]>");
    if (next_segment != NULL) {
      stream->write(
          segment, static_cast<std::streamsize>(next_segment - segment));
      *stream << "]]>]]&gt;<![CDATA[";
      segment = next_segment + strlen("]]>");
    } else {
      *stream << segment;
      break;
    }
  }
  *stream << "]]>";
}

void XmlUnitTestResultPrinter::OutputXmlAttribute(
    std::ostream* stream,
    const std::string& element_name,
    const std::string& name,
    const std::string& value) {
  const std::vector<std::string>& allowed_names =
      GetReservedAttributesForElement(element_name);

  GTEST_CHECK_(std::find(allowed_names.begin(), allowed_names.end(), name) !=
                   allowed_names.end())
      << "Attribute " << name << " is not allowed for element <" << element_name
      << ">.";

  *stream << " " << name << "=\"" << EscapeXmlAttribute(value) << "\"";
}

// Prints an XML representation of a TestInfo object.
// TODO(wan): There is also value in printing properties with the plain printer.
void XmlUnitTestResultPrinter::OutputXmlTestInfo(::std::ostream* stream,
                                                 const char* test_case_name,
                                                 const TestInfo& test_info) {
  const TestResult& result = *test_info.result();
  const std::string kTestcase = "testcase";

  *stream << "    <testcase";
  OutputXmlAttribute(stream, kTestcase, "name", test_info.name());

  if (test_info.value_param() != NULL) {
    OutputXmlAttribute(stream, kTestcase, "value_param",
                       test_info.value_param());
  }
  if (test_info.type_param() != NULL) {
    OutputXmlAttribute(stream, kTestcase, "type_param", test_info.type_param());
  }

  OutputXmlAttribute(stream, kTestcase, "status",
                     test_info.should_run() ? "run" : "notrun");
  OutputXmlAttribute(stream, kTestcase, "time",
                     FormatTimeInMillisAsSeconds(result.elapsed_time()));
  OutputXmlAttribute(stream, kTestcase, "classname", test_case_name);
  *stream << TestPropertiesAsXmlAttributes(result);

  int failures = 0;
  for (int i = 0; i < result.total_part_count(); ++i) {
    const TestPartResult& part = result.GetTestPartResult(i);
    if (part.failed()) {
      if (++failures == 1) {
        *stream << ">\n";
      }
      const string location = internal::FormatCompilerIndependentFileLocation(
          part.file_name(), part.line_number());
      const string summary = location + "\n" + part.summary();
      *stream << "      <failure message=\""
              << EscapeXmlAttribute(summary.c_str())
              << "\" type=\"\">";
      const string detail = location + "\n" + part.message();
      OutputXmlCDataSection(stream, RemoveInvalidXmlCharacters(detail).c_str());
      *stream << "</failure>\n";
    }
  }

  if (failures == 0)
    *stream << " />\n";
  else
    *stream << "    </testcase>\n";
}

// Prints an XML representation of a TestCase object
void XmlUnitTestResultPrinter::PrintXmlTestCase(std::ostream* stream,
                                                const TestCase& test_case) {
  const std::string kTestsuite = "testsuite";
  *stream << "  <" << kTestsuite;
  OutputXmlAttribute(stream, kTestsuite, "name", test_case.name());
  OutputXmlAttribute(stream, kTestsuite, "tests",
                     StreamableToString(test_case.reportable_test_count()));
  OutputXmlAttribute(stream, kTestsuite, "failures",
                     StreamableToString(test_case.failed_test_count()));
  OutputXmlAttribute(
      stream, kTestsuite, "disabled",
      StreamableToString(test_case.reportable_disabled_test_count()));
  OutputXmlAttribute(stream, kTestsuite, "errors", "0");
  OutputXmlAttribute(stream, kTestsuite, "time",
                     FormatTimeInMillisAsSeconds(test_case.elapsed_time()));
  *stream << TestPropertiesAsXmlAttributes(test_case.ad_hoc_test_result())
          << ">\n";

  for (int i = 0; i < test_case.total_test_count(); ++i) {
    if (test_case.GetTestInfo(i)->is_reportable())
      OutputXmlTestInfo(stream, test_case.name(), *test_case.GetTestInfo(i));
  }
  *stream << "  </" << kTestsuite << ">\n";
}

// Prints an XML summary of unit_test to output stream out.
void XmlUnitTestResultPrinter::PrintXmlUnitTest(std::ostream* stream,
                                                const UnitTest& unit_test) {
  const std::string kTestsuites = "testsuites";

  *stream << "<?xml version=\"1.0\" encoding=\"UTF-8\"?>\n";
  *stream << "<" << kTestsuites;

  OutputXmlAttribute(stream, kTestsuites, "tests",
                     StreamableToString(unit_test.reportable_test_count()));
  OutputXmlAttribute(stream, kTestsuites, "failures",
                     StreamableToString(unit_test.failed_test_count()));
  OutputXmlAttribute(
      stream, kTestsuites, "disabled",
      StreamableToString(unit_test.reportable_disabled_test_count()));
  OutputXmlAttribute(stream, kTestsuites, "errors", "0");
  OutputXmlAttribute(
      stream, kTestsuites, "timestamp",
      FormatEpochTimeInMillisAsIso8601(unit_test.start_timestamp()));
  OutputXmlAttribute(stream, kTestsuites, "time",
                     FormatTimeInMillisAsSeconds(unit_test.elapsed_time()));

  if (GTEST_FLAG(shuffle)) {
    OutputXmlAttribute(stream, kTestsuites, "random_seed",
                       StreamableToString(unit_test.random_seed()));
  }

  *stream << TestPropertiesAsXmlAttributes(unit_test.ad_hoc_test_result());

  OutputXmlAttribute(stream, kTestsuites, "name", "AllTests");
  *stream << ">\n";

  for (int i = 0; i < unit_test.total_test_case_count(); ++i) {
    if (unit_test.GetTestCase(i)->reportable_test_count() > 0)
      PrintXmlTestCase(stream, *unit_test.GetTestCase(i));
  }
  *stream << "</" << kTestsuites << ">\n";
}

// Produces a string representing the test properties in a result as space
// delimited XML attributes based on the property key="value" pairs.
std::string XmlUnitTestResultPrinter::TestPropertiesAsXmlAttributes(
    const TestResult& result) {
  Message attributes;
  for (int i = 0; i < result.test_property_count(); ++i) {
    const TestProperty& property = result.GetTestProperty(i);
    attributes << " " << property.key() << "="
        << "\"" << EscapeXmlAttribute(property.value()) << "\"";
  }
  return attributes.GetString();
}

// End XmlUnitTestResultPrinter

#if GTEST_CAN_STREAM_RESULTS_

// Checks if str contains '=', '&', '%' or '\n' characters. If yes,
// replaces them by "%xx" where xx is their hexadecimal value. For
// example, replaces "=" with "%3D".  This algorithm is O(strlen(str))
// in both time and space -- important as the input str may contain an
// arbitrarily long test failure message and stack trace.
string StreamingListener::UrlEncode(const char* str) {
  string result;
  result.reserve(strlen(str) + 1);
  for (char ch = *str; ch != '\0'; ch = *++str) {
    switch (ch) {
      case '%':
      case '=':
      case '&':
      case '\n':
        result.append("%" + String::FormatByte(static_cast<unsigned char>(ch)));
        break;
      default:
        result.push_back(ch);
        break;
    }
  }
  return result;
}

void StreamingListener::SocketWriter::MakeConnection() {
  GTEST_CHECK_(sockfd_ == -1)
      << "MakeConnection() can't be called when there is already a connection.";

  addrinfo hints;
  memset(&hints, 0, sizeof(hints));
  hints.ai_family = AF_UNSPEC;    // To allow both IPv4 and IPv6 addresses.
  hints.ai_socktype = SOCK_STREAM;
  addrinfo* servinfo = NULL;

  // Use the getaddrinfo() to get a linked list of IP addresses for
  // the given host name.
  const int error_num = getaddrinfo(
      host_name_.c_str(), port_num_.c_str(), &hints, &servinfo);
  if (error_num != 0) {
    GTEST_LOG_(WARNING) << "stream_result_to: getaddrinfo() failed: "
                        << gai_strerror(error_num);
  }

  // Loop through all the results and connect to the first we can.
  for (addrinfo* cur_addr = servinfo; sockfd_ == -1 && cur_addr != NULL;
       cur_addr = cur_addr->ai_next) {
    sockfd_ = socket(
        cur_addr->ai_family, cur_addr->ai_socktype, cur_addr->ai_protocol);
    if (sockfd_ != -1) {
      // Connect the client socket to the server socket.
      if (connect(sockfd_, cur_addr->ai_addr, cur_addr->ai_addrlen) == -1) {
        close(sockfd_);
        sockfd_ = -1;
      }
    }
  }

  freeaddrinfo(servinfo);  // all done with this structure

  if (sockfd_ == -1) {
    GTEST_LOG_(WARNING) << "stream_result_to: failed to connect to "
                        << host_name_ << ":" << port_num_;
  }
}

// End of class Streaming Listener
#endif  // GTEST_CAN_STREAM_RESULTS__

// Class ScopedTrace

// Pushes the given source file location and message onto a per-thread
// trace stack maintained by Google Test.
ScopedTrace::ScopedTrace(const char* file, int line, const Message& message)
    GTEST_LOCK_EXCLUDED_(&UnitTest::mutex_) {
  TraceInfo trace;
  trace.file = file;
  trace.line = line;
  trace.message = message.GetString();

  UnitTest::GetInstance()->PushGTestTrace(trace);
}

// Pops the info pushed by the c'tor.
ScopedTrace::~ScopedTrace()
    GTEST_LOCK_EXCLUDED_(&UnitTest::mutex_) {
  UnitTest::GetInstance()->PopGTestTrace();
}


// class OsStackTraceGetter

const char* const OsStackTraceGetterInterface::kElidedFramesMarker =
    "... " GTEST_NAME_ " internal frames ...";

string OsStackTraceGetter::CurrentStackTrace(int /*max_depth*/,
                                             int /*skip_count*/) {
  return "";
}

void OsStackTraceGetter::UponLeavingGTest() {}

// A helper class that creates the premature-exit file in its
// constructor and deletes the file in its destructor.
class ScopedPrematureExitFile {
 public:
  explicit ScopedPrematureExitFile(const char* premature_exit_filepath)
      : premature_exit_filepath_(premature_exit_filepath) {
    // If a path to the premature-exit file is specified...
    if (premature_exit_filepath != NULL && *premature_exit_filepath != '\0') {
      // create the file with a single "0" character in it.  I/O
      // errors are ignored as there's nothing better we can do and we
      // don't want to fail the test because of this.
      FILE* pfile = posix::FOpen(premature_exit_filepath, "w");
      fwrite("0", 1, 1, pfile);
      fclose(pfile);
    }
  }

  ~ScopedPrematureExitFile() {
    if (premature_exit_filepath_ != NULL && *premature_exit_filepath_ != '\0') {
      remove(premature_exit_filepath_);
    }
  }

 private:
  const char* const premature_exit_filepath_;

  GTEST_DISALLOW_COPY_AND_ASSIGN_(ScopedPrematureExitFile);
};

}  // namespace internal

// class TestEventListeners

TestEventListeners::TestEventListeners()
    : repeater_(new internal::TestEventRepeater()),
      default_result_printer_(NULL),
      default_xml_generator_(NULL) {
}

TestEventListeners::~TestEventListeners() { delete repeater_; }

// Returns the standard listener responsible for the default console
// output.  Can be removed from the listeners list to shut down default
// console output.  Note that removing this object from the listener list
// with Release transfers its ownership to the user.
void TestEventListeners::Append(TestEventListener* listener) {
  repeater_->Append(listener);
}

// Removes the given event listener from the list and returns it.  It then
// becomes the caller's responsibility to delete the listener. Returns
// NULL if the listener is not found in the list.
TestEventListener* TestEventListeners::Release(TestEventListener* listener) {
  if (listener == default_result_printer_)
    default_result_printer_ = NULL;
  else if (listener == default_xml_generator_)
    default_xml_generator_ = NULL;
  return repeater_->Release(listener);
}

// Returns repeater that broadcasts the TestEventListener events to all
// subscribers.
TestEventListener* TestEventListeners::repeater() { return repeater_; }

// Sets the default_result_printer attribute to the provided listener.
// The listener is also added to the listener list and previous
// default_result_printer is removed from it and deleted. The listener can
// also be NULL in which case it will not be added to the list. Does
// nothing if the previous and the current listener objects are the same.
void TestEventListeners::SetDefaultResultPrinter(TestEventListener* listener) {
  if (default_result_printer_ != listener) {
    // It is an error to pass this method a listener that is already in the
    // list.
    delete Release(default_result_printer_);
    default_result_printer_ = listener;
    if (listener != NULL)
      Append(listener);
  }
}

// Sets the default_xml_generator attribute to the provided listener.  The
// listener is also added to the listener list and previous
// default_xml_generator is removed from it and deleted. The listener can
// also be NULL in which case it will not be added to the list. Does
// nothing if the previous and the current listener objects are the same.
void TestEventListeners::SetDefaultXmlGenerator(TestEventListener* listener) {
  if (default_xml_generator_ != listener) {
    // It is an error to pass this method a listener that is already in the
    // list.
    delete Release(default_xml_generator_);
    default_xml_generator_ = listener;
    if (listener != NULL)
      Append(listener);
  }
}

// Controls whether events will be forwarded by the repeater to the
// listeners in the list.
bool TestEventListeners::EventForwardingEnabled() const {
  return repeater_->forwarding_enabled();
}

void TestEventListeners::SuppressEventForwarding() {
  repeater_->set_forwarding_enabled(false);
}

// class UnitTest

// Gets the singleton UnitTest object.  The first time this method is
// called, a UnitTest object is constructed and returned.  Consecutive
// calls will return the same object.
//
// We don't protect this under mutex_ as a user is not supposed to
// call this before main() starts, from which point on the return
// value will never change.
UnitTest* UnitTest::GetInstance() {
  // When compiled with MSVC 7.1 in optimized mode, destroying the
  // UnitTest object upon exiting the program messes up the exit code,
  // causing successful tests to appear failed.  We have to use a
  // different implementation in this case to bypass the compiler bug.
  // This implementation makes the compiler happy, at the cost of
  // leaking the UnitTest object.

  // CodeGear C++Builder insists on a public destructor for the
  // default implementation.  Use this implementation to keep good OO
  // design with private destructor.

#if (_MSC_VER == 1310 && !defined(_DEBUG)) || defined(__BORLANDC__)
  static UnitTest* const instance = new UnitTest;
  return instance;
#else
  static UnitTest instance;
  return &instance;
#endif  // (_MSC_VER == 1310 && !defined(_DEBUG)) || defined(__BORLANDC__)
}

// Gets the number of successful test cases.
int UnitTest::successful_test_case_count() const {
  return impl()->successful_test_case_count();
}

// Gets the number of failed test cases.
int UnitTest::failed_test_case_count() const {
  return impl()->failed_test_case_count();
}

// Gets the number of all test cases.
int UnitTest::total_test_case_count() const {
  return impl()->total_test_case_count();
}

// Gets the number of all test cases that contain at least one test
// that should run.
int UnitTest::test_case_to_run_count() const {
  return impl()->test_case_to_run_count();
}

// Gets the number of successful tests.
int UnitTest::successful_test_count() const {
  return impl()->successful_test_count();
}

// Gets the number of failed tests.
int UnitTest::failed_test_count() const { return impl()->failed_test_count(); }

// Gets the number of disabled tests that will be reported in the XML report.
int UnitTest::reportable_disabled_test_count() const {
  return impl()->reportable_disabled_test_count();
}

// Gets the number of disabled tests.
int UnitTest::disabled_test_count() const {
  return impl()->disabled_test_count();
}

// Gets the number of tests to be printed in the XML report.
int UnitTest::reportable_test_count() const {
  return impl()->reportable_test_count();
}

// Gets the number of all tests.
int UnitTest::total_test_count() const { return impl()->total_test_count(); }

// Gets the number of tests that should run.
int UnitTest::test_to_run_count() const { return impl()->test_to_run_count(); }

// Gets the time of the test program start, in ms from the start of the
// UNIX epoch.
internal::TimeInMillis UnitTest::start_timestamp() const {
    return impl()->start_timestamp();
}

// Gets the elapsed time, in milliseconds.
internal::TimeInMillis UnitTest::elapsed_time() const {
  return impl()->elapsed_time();
}

// Returns true iff the unit test passed (i.e. all test cases passed).
bool UnitTest::Passed() const { return impl()->Passed(); }

// Returns true iff the unit test failed (i.e. some test case failed
// or something outside of all tests failed).
bool UnitTest::Failed() const { return impl()->Failed(); }

// Gets the i-th test case among all the test cases. i can range from 0 to
// total_test_case_count() - 1. If i is not in that range, returns NULL.
const TestCase* UnitTest::GetTestCase(int i) const {
  return impl()->GetTestCase(i);
}

// Returns the TestResult containing information on test failures and
// properties logged outside of individual test cases.
const TestResult& UnitTest::ad_hoc_test_result() const {
  return *impl()->ad_hoc_test_result();
}

// Gets the i-th test case among all the test cases. i can range from 0 to
// total_test_case_count() - 1. If i is not in that range, returns NULL.
TestCase* UnitTest::GetMutableTestCase(int i) {
  return impl()->GetMutableTestCase(i);
}

// Returns the list of event listeners that can be used to track events
// inside Google Test.
TestEventListeners& UnitTest::listeners() {
  return *impl()->listeners();
}

// Registers and returns a global test environment.  When a test
// program is run, all global test environments will be set-up in the
// order they were registered.  After all tests in the program have
// finished, all global test environments will be torn-down in the
// *reverse* order they were registered.
//
// The UnitTest object takes ownership of the given environment.
//
// We don't protect this under mutex_, as we only support calling it
// from the main thread.
Environment* UnitTest::AddEnvironment(Environment* env) {
  if (env == NULL) {
    return NULL;
  }

  impl_->environments().push_back(env);
  return env;
}

// Adds a TestPartResult to the current TestResult object.  All Google Test
// assertion macros (e.g. ASSERT_TRUE, EXPECT_EQ, etc) eventually call
// this to report their results.  The user code should use the
// assertion macros instead of calling this directly.
void UnitTest::AddTestPartResult(
    TestPartResult::Type result_type,
    const char* file_name,
    int line_number,
    const std::string& message,
    const std::string& os_stack_trace) GTEST_LOCK_EXCLUDED_(mutex_) {
  Message msg;
  msg << message;

  internal::MutexLock lock(&mutex_);
  if (impl_->gtest_trace_stack().size() > 0) {
    msg << "\n" << GTEST_NAME_ << " trace:";

    for (int i = static_cast<int>(impl_->gtest_trace_stack().size());
         i > 0; --i) {
      const internal::TraceInfo& trace = impl_->gtest_trace_stack()[i - 1];
      msg << "\n" << internal::FormatFileLocation(trace.file, trace.line)
          << " " << trace.message;
    }
  }

  if (os_stack_trace.c_str() != NULL && !os_stack_trace.empty()) {
    msg << internal::kStackTraceMarker << os_stack_trace;
  }

  const TestPartResult result =
    TestPartResult(result_type, file_name, line_number,
                   msg.GetString().c_str());
  impl_->GetTestPartResultReporterForCurrentThread()->
      ReportTestPartResult(result);

  if (result_type != TestPartResult::kSuccess) {
    // gtest_break_on_failure takes precedence over
    // gtest_throw_on_failure.  This allows a user to set the latter
    // in the code (perhaps in order to use Google Test assertions
    // with another testing framework) and specify the former on the
    // command line for debugging.
    if (GTEST_FLAG(break_on_failure)) {
#if GTEST_OS_WINDOWS && !GTEST_OS_WINDOWS_PHONE && !GTEST_OS_WINDOWS_RT
      // Using DebugBreak on Windows allows gtest to still break into a debugger
      // when a failure happens and both the --gtest_break_on_failure and
      // the --gtest_catch_exceptions flags are specified.
      DebugBreak();
#else
      // Dereference NULL through a volatile pointer to prevent the compiler
      // from removing. We use this rather than abort() or __builtin_trap() for
      // portability: Symbian doesn't implement abort() well, and some debuggers
      // don't correctly trap abort().
      *static_cast<volatile int*>(NULL) = 1;
#endif  // GTEST_OS_WINDOWS
    } else if (GTEST_FLAG(throw_on_failure)) {
#if GTEST_HAS_EXCEPTIONS
      throw internal::GoogleTestFailureException(result);
#else
      // We cannot call abort() as it generates a pop-up in debug mode
      // that cannot be suppressed in VC 7.1 or below.
      exit(1);
#endif
    }
  }
}

// Adds a TestProperty to the current TestResult object when invoked from
// inside a test, to current TestCase's ad_hoc_test_result_ when invoked
// from SetUpTestCase or TearDownTestCase, or to the global property set
// when invoked elsewhere.  If the result already contains a property with
// the same key, the value will be updated.
void UnitTest::RecordProperty(const std::string& key,
                              const std::string& value) {
  impl_->RecordProperty(TestProperty(key, value));
}

// Runs all tests in this UnitTest object and prints the result.
// Returns 0 if successful, or 1 otherwise.
//
// We don't protect this under mutex_, as we only support calling it
// from the main thread.
int UnitTest::Run() {
  const bool in_death_test_child_process =
      internal::GTEST_FLAG(internal_run_death_test).length() > 0;

  // Google Test implements this protocol for catching that a test
  // program exits before returning control to Google Test:
  //
  //   1. Upon start, Google Test creates a file whose absolute path
  //      is specified by the environment variable
  //      TEST_PREMATURE_EXIT_FILE.
  //   2. When Google Test has finished its work, it deletes the file.
  //
  // This allows a test runner to set TEST_PREMATURE_EXIT_FILE before
  // running a Google-Test-based test program and check the existence
  // of the file at the end of the test execution to see if it has
  // exited prematurely.

  // If we are in the child process of a death test, don't
  // create/delete the premature exit file, as doing so is unnecessary
  // and will confuse the parent process.  Otherwise, create/delete
  // the file upon entering/leaving this function.  If the program
  // somehow exits before this function has a chance to return, the
  // premature-exit file will be left undeleted, causing a test runner
  // that understands the premature-exit-file protocol to report the
  // test as having failed.
  const internal::ScopedPrematureExitFile premature_exit_file(
      in_death_test_child_process ?
      NULL : internal::posix::GetEnv("TEST_PREMATURE_EXIT_FILE"));

  // Captures the value of GTEST_FLAG(catch_exceptions).  This value will be
  // used for the duration of the program.
  impl()->set_catch_exceptions(GTEST_FLAG(catch_exceptions));

#if GTEST_HAS_SEH
  // Either the user wants Google Test to catch exceptions thrown by the
  // tests or this is executing in the context of death test child
  // process. In either case the user does not want to see pop-up dialogs
  // about crashes - they are expected.
  if (impl()->catch_exceptions() || in_death_test_child_process) {
# if !GTEST_OS_WINDOWS_MOBILE && !GTEST_OS_WINDOWS_PHONE && !GTEST_OS_WINDOWS_RT
    // SetErrorMode doesn't exist on CE.
    SetErrorMode(SEM_FAILCRITICALERRORS | SEM_NOALIGNMENTFAULTEXCEPT |
                 SEM_NOGPFAULTERRORBOX | SEM_NOOPENFILEERRORBOX);
# endif  // !GTEST_OS_WINDOWS_MOBILE

# if (defined(_MSC_VER) || GTEST_OS_WINDOWS_MINGW) && !GTEST_OS_WINDOWS_MOBILE
    // Death test children can be terminated with _abort().  On Windows,
    // _abort() can show a dialog with a warning message.  This forces the
    // abort message to go to stderr instead.
    _set_error_mode(_OUT_TO_STDERR);
# endif

# if _MSC_VER >= 1400 && !GTEST_OS_WINDOWS_MOBILE
    // In the debug version, Visual Studio pops up a separate dialog
    // offering a choice to debug the aborted program. We need to suppress
    // this dialog or it will pop up for every EXPECT/ASSERT_DEATH statement
    // executed. Google Test will notify the user of any unexpected
    // failure via stderr.
    //
    // VC++ doesn't define _set_abort_behavior() prior to the version 8.0.
    // Users of prior VC versions shall suffer the agony and pain of
    // clicking through the countless debug dialogs.
    // TODO(vladl@google.com): find a way to suppress the abort dialog() in the
    // debug mode when compiled with VC 7.1 or lower.
    if (!GTEST_FLAG(break_on_failure))
      _set_abort_behavior(
          0x0,                                    // Clear the following flags:
          _WRITE_ABORT_MSG | _CALL_REPORTFAULT);  // pop-up window, core dump.
# endif
  }
#endif  // GTEST_HAS_SEH

  return internal::HandleExceptionsInMethodIfSupported(
      impl(),
      &internal::UnitTestImpl::RunAllTests,
      "auxiliary test code (environments or event listeners)") ? 0 : 1;
}

// Returns the working directory when the first TEST() or TEST_F() was
// executed.
const char* UnitTest::original_working_dir() const {
  return impl_->original_working_dir_.c_str();
}

// Returns the TestCase object for the test that's currently running,
// or NULL if no test is running.
const TestCase* UnitTest::current_test_case() const
    GTEST_LOCK_EXCLUDED_(mutex_) {
  internal::MutexLock lock(&mutex_);
  return impl_->current_test_case();
}

// Returns the TestInfo object for the test that's currently running,
// or NULL if no test is running.
const TestInfo* UnitTest::current_test_info() const
    GTEST_LOCK_EXCLUDED_(mutex_) {
  internal::MutexLock lock(&mutex_);
  return impl_->current_test_info();
}

// Returns the random seed used at the start of the current test run.
int UnitTest::random_seed() const { return impl_->random_seed(); }

#if GTEST_HAS_PARAM_TEST
// Returns ParameterizedTestCaseRegistry object used to keep track of
// value-parameterized tests and instantiate and register them.
internal::ParameterizedTestCaseRegistry&
    UnitTest::parameterized_test_registry()
        GTEST_LOCK_EXCLUDED_(mutex_) {
  return impl_->parameterized_test_registry();
}
#endif  // GTEST_HAS_PARAM_TEST

// Creates an empty UnitTest.
UnitTest::UnitTest() {
  impl_ = new internal::UnitTestImpl(this);
}

// Destructor of UnitTest.
UnitTest::~UnitTest() {
  delete impl_;
}

// Pushes a trace defined by SCOPED_TRACE() on to the per-thread
// Google Test trace stack.
void UnitTest::PushGTestTrace(const internal::TraceInfo& trace)
    GTEST_LOCK_EXCLUDED_(mutex_) {
  internal::MutexLock lock(&mutex_);
  impl_->gtest_trace_stack().push_back(trace);
}

// Pops a trace from the per-thread Google Test trace stack.
void UnitTest::PopGTestTrace()
    GTEST_LOCK_EXCLUDED_(mutex_) {
  internal::MutexLock lock(&mutex_);
  impl_->gtest_trace_stack().pop_back();
}

namespace internal {

UnitTestImpl::UnitTestImpl(UnitTest* parent)
    : parent_(parent),
      GTEST_DISABLE_MSC_WARNINGS_PUSH_(4355 /* using this in initializer */)
      default_global_test_part_result_reporter_(this),
      default_per_thread_test_part_result_reporter_(this),
      GTEST_DISABLE_MSC_WARNINGS_POP_()
      global_test_part_result_repoter_(
          &default_global_test_part_result_reporter_),
      per_thread_test_part_result_reporter_(
          &default_per_thread_test_part_result_reporter_),
#if GTEST_HAS_PARAM_TEST
      parameterized_test_registry_(),
      parameterized_tests_registered_(false),
#endif  // GTEST_HAS_PARAM_TEST
      last_death_test_case_(-1),
      current_test_case_(NULL),
      current_test_info_(NULL),
      ad_hoc_test_result_(),
      os_stack_trace_getter_(NULL),
      post_flag_parse_init_performed_(false),
      random_seed_(0),  // Will be overridden by the flag before first use.
      random_(0),  // Will be reseeded before first use.
      start_timestamp_(0),
      elapsed_time_(0),
#if GTEST_HAS_DEATH_TEST
      death_test_factory_(new DefaultDeathTestFactory),
#endif
      // Will be overridden by the flag before first use.
      catch_exceptions_(false) {
  listeners()->SetDefaultResultPrinter(new PrettyUnitTestResultPrinter);
}

UnitTestImpl::~UnitTestImpl() {
  // Deletes every TestCase.
  ForEach(test_cases_, internal::Delete<TestCase>);

  // Deletes every Environment.
  ForEach(environments_, internal::Delete<Environment>);

  delete os_stack_trace_getter_;
}

// Adds a TestProperty to the current TestResult object when invoked in a
// context of a test, to current test case's ad_hoc_test_result when invoke
// from SetUpTestCase/TearDownTestCase, or to the global property set
// otherwise.  If the result already contains a property with the same key,
// the value will be updated.
void UnitTestImpl::RecordProperty(const TestProperty& test_property) {
  std::string xml_element;
  TestResult* test_result;  // TestResult appropriate for property recording.

  if (current_test_info_ != NULL) {
    xml_element = "testcase";
    test_result = &(current_test_info_->result_);
  } else if (current_test_case_ != NULL) {
    xml_element = "testsuite";
    test_result = &(current_test_case_->ad_hoc_test_result_);
  } else {
    xml_element = "testsuites";
    test_result = &ad_hoc_test_result_;
  }
  test_result->RecordProperty(xml_element, test_property);
}

#if GTEST_HAS_DEATH_TEST
// Disables event forwarding if the control is currently in a death test
// subprocess. Must not be called before InitGoogleTest.
void UnitTestImpl::SuppressTestEventsIfInSubprocess() {
  if (internal_run_death_test_flag_.get() != NULL)
    listeners()->SuppressEventForwarding();
}
#endif  // GTEST_HAS_DEATH_TEST

// Initializes event listeners performing XML output as specified by
// UnitTestOptions. Must not be called before InitGoogleTest.
void UnitTestImpl::ConfigureXmlOutput() {
  const std::string& output_format = UnitTestOptions::GetOutputFormat();
  if (output_format == "xml") {
    listeners()->SetDefaultXmlGenerator(new XmlUnitTestResultPrinter(
        UnitTestOptions::GetAbsolutePathToOutputFile().c_str()));
  } else if (output_format != "") {
    printf("WARNING: unrecognized output format \"%s\" ignored.\n",
           output_format.c_str());
    fflush(stdout);
  }
}

#if GTEST_CAN_STREAM_RESULTS_
// Initializes event listeners for streaming test results in string form.
// Must not be called before InitGoogleTest.
void UnitTestImpl::ConfigureStreamingOutput() {
  const std::string& target = GTEST_FLAG(stream_result_to);
  if (!target.empty()) {
    const size_t pos = target.find(':');
    if (pos != std::string::npos) {
      listeners()->Append(new StreamingListener(target.substr(0, pos),
                                                target.substr(pos+1)));
    } else {
      printf("WARNING: unrecognized streaming target \"%s\" ignored.\n",
             target.c_str());
      fflush(stdout);
    }
  }
}
#endif  // GTEST_CAN_STREAM_RESULTS_

// Performs initialization dependent upon flag values obtained in
// ParseGoogleTestFlagsOnly.  Is called from InitGoogleTest after the call to
// ParseGoogleTestFlagsOnly.  In case a user neglects to call InitGoogleTest
// this function is also called from RunAllTests.  Since this function can be
// called more than once, it has to be idempotent.
void UnitTestImpl::PostFlagParsingInit() {
  // Ensures that this function does not execute more than once.
  if (!post_flag_parse_init_performed_) {
    post_flag_parse_init_performed_ = true;

#if defined(GTEST_CUSTOM_TEST_EVENT_LISTENER_)
    // Register to send notifications about key process state changes.
    listeners()->Append(new GTEST_CUSTOM_TEST_EVENT_LISTENER_());
#endif  // defined(GTEST_CUSTOM_TEST_EVENT_LISTENER_)

#if GTEST_HAS_DEATH_TEST
    InitDeathTestSubprocessControlInfo();
    SuppressTestEventsIfInSubprocess();
#endif  // GTEST_HAS_DEATH_TEST

    // Registers parameterized tests. This makes parameterized tests
    // available to the UnitTest reflection API without running
    // RUN_ALL_TESTS.
    RegisterParameterizedTests();

    // Configures listeners for XML output. This makes it possible for users
    // to shut down the default XML output before invoking RUN_ALL_TESTS.
    ConfigureXmlOutput();

#if GTEST_CAN_STREAM_RESULTS_
    // Configures listeners for streaming test results to the specified server.
    ConfigureStreamingOutput();
#endif  // GTEST_CAN_STREAM_RESULTS_
  }
}

// A predicate that checks the name of a TestCase against a known
// value.
//
// This is used for implementation of the UnitTest class only.  We put
// it in the anonymous namespace to prevent polluting the outer
// namespace.
//
// TestCaseNameIs is copyable.
class TestCaseNameIs {
 public:
  // Constructor.
  explicit TestCaseNameIs(const std::string& name)
      : name_(name) {}

  // Returns true iff the name of test_case matches name_.
  bool operator()(const TestCase* test_case) const {
    return test_case != NULL && strcmp(test_case->name(), name_.c_str()) == 0;
  }

 private:
  std::string name_;
};

// Finds and returns a TestCase with the given name.  If one doesn't
// exist, creates one and returns it.  It's the CALLER'S
// RESPONSIBILITY to ensure that this function is only called WHEN THE
// TESTS ARE NOT SHUFFLED.
//
// Arguments:
//
//   test_case_name: name of the test case
//   type_param:     the name of the test case's type parameter, or NULL if
//                   this is not a typed or a type-parameterized test case.
//   set_up_tc:      pointer to the function that sets up the test case
//   tear_down_tc:   pointer to the function that tears down the test case
TestCase* UnitTestImpl::GetTestCase(const char* test_case_name,
                                    const char* type_param,
                                    Test::SetUpTestCaseFunc set_up_tc,
                                    Test::TearDownTestCaseFunc tear_down_tc) {
  // Can we find a TestCase with the given name?
  const std::vector<TestCase*>::const_iterator test_case =
      std::find_if(test_cases_.begin(), test_cases_.end(),
                   TestCaseNameIs(test_case_name));

  if (test_case != test_cases_.end())
    return *test_case;

  // No.  Let's create one.
  TestCase* const new_test_case =
      new TestCase(test_case_name, type_param, set_up_tc, tear_down_tc);

  // Is this a death test case?
  if (internal::UnitTestOptions::MatchesFilter(test_case_name,
                                               kDeathTestCaseFilter)) {
    // Yes.  Inserts the test case after the last death test case
    // defined so far.  This only works when the test cases haven't
    // been shuffled.  Otherwise we may end up running a death test
    // after a non-death test.
    ++last_death_test_case_;
    test_cases_.insert(test_cases_.begin() + last_death_test_case_,
                       new_test_case);
  } else {
    // No.  Appends to the end of the list.
    test_cases_.push_back(new_test_case);
  }

  test_case_indices_.push_back(static_cast<int>(test_case_indices_.size()));
  return new_test_case;
}

// Helpers for setting up / tearing down the given environment.  They
// are for use in the ForEach() function.
static void SetUpEnvironment(Environment* env) { env->SetUp(); }
static void TearDownEnvironment(Environment* env) { env->TearDown(); }

// Runs all tests in this UnitTest object, prints the result, and
// returns true if all tests are successful.  If any exception is
// thrown during a test, the test is considered to be failed, but the
// rest of the tests will still be run.
//
// When parameterized tests are enabled, it expands and registers
// parameterized tests first in RegisterParameterizedTests().
// All other functions called from RunAllTests() may safely assume that
// parameterized tests are ready to be counted and run.
bool UnitTestImpl::RunAllTests() {
  // Makes sure InitGoogleTest() was called.
  if (!GTestIsInitialized()) {
    printf("%s",
           "\nThis test program did NOT call ::testing::InitGoogleTest "
           "before calling RUN_ALL_TESTS().  Please fix it.\n");
    return false;
  }

  // Do not run any test if the --help flag was specified.
  if (g_help_flag)
    return true;

  // Repeats the call to the post-flag parsing initialization in case the
  // user didn't call InitGoogleTest.
  PostFlagParsingInit();

  // Even if sharding is not on, test runners may want to use the
  // GTEST_SHARD_STATUS_FILE to query whether the test supports the sharding
  // protocol.
  internal::WriteToShardStatusFileIfNeeded();

  // True iff we are in a subprocess for running a thread-safe-style
  // death test.
  bool in_subprocess_for_death_test = false;

#if GTEST_HAS_DEATH_TEST
  in_subprocess_for_death_test = (internal_run_death_test_flag_.get() != NULL);
# if defined(GTEST_EXTRA_DEATH_TEST_CHILD_SETUP_)
  if (in_subprocess_for_death_test) {
    GTEST_EXTRA_DEATH_TEST_CHILD_SETUP_();
  }
# endif  // defined(GTEST_EXTRA_DEATH_TEST_CHILD_SETUP_)
#endif  // GTEST_HAS_DEATH_TEST

  const bool should_shard = ShouldShard(kTestTotalShards, kTestShardIndex,
                                        in_subprocess_for_death_test);

  // Compares the full test names with the filter to decide which
  // tests to run.
  const bool has_tests_to_run = FilterTests(should_shard
                                              ? HONOR_SHARDING_PROTOCOL
                                              : IGNORE_SHARDING_PROTOCOL) > 0;

  // Lists the tests and exits if the --gtest_list_tests flag was specified.
  if (GTEST_FLAG(list_tests)) {
    // This must be called *after* FilterTests() has been called.
    ListTestsMatchingFilter();
    return true;
  }

  random_seed_ = GTEST_FLAG(shuffle) ?
      GetRandomSeedFromFlag(GTEST_FLAG(random_seed)) : 0;

  // True iff at least one test has failed.
  bool failed = false;

  TestEventListener* repeater = listeners()->repeater();

  start_timestamp_ = GetTimeInMillis();
  repeater->OnTestProgramStart(*parent_);

  // How many times to repeat the tests?  We don't want to repeat them
  // when we are inside the subprocess of a death test.
  const int repeat = in_subprocess_for_death_test ? 1 : GTEST_FLAG(repeat);
  // Repeats forever if the repeat count is negative.
  const bool forever = repeat < 0;
  for (int i = 0; forever || i != repeat; i++) {
    // We want to preserve failures generated by ad-hoc test
    // assertions executed before RUN_ALL_TESTS().
    ClearNonAdHocTestResult();

    const TimeInMillis start = GetTimeInMillis();

    // Shuffles test cases and tests if requested.
    if (has_tests_to_run && GTEST_FLAG(shuffle)) {
      random()->Reseed(random_seed_);
      // This should be done before calling OnTestIterationStart(),
      // such that a test event listener can see the actual test order
      // in the event.
      ShuffleTests();
    }

    // Tells the unit test event listeners that the tests are about to start.
    repeater->OnTestIterationStart(*parent_, i);

    // Runs each test case if there is at least one test to run.
    if (has_tests_to_run) {
      // Sets up all environments beforehand.
      repeater->OnEnvironmentsSetUpStart(*parent_);
      ForEach(environments_, SetUpEnvironment);
      repeater->OnEnvironmentsSetUpEnd(*parent_);

      // Runs the tests only if there was no fatal failure during global
      // set-up.
      if (!Test::HasFatalFailure()) {
        for (int test_index = 0; test_index < total_test_case_count();
             test_index++) {
          GetMutableTestCase(test_index)->Run();
        }
      }

      // Tears down all environments in reverse order afterwards.
      repeater->OnEnvironmentsTearDownStart(*parent_);
      std::for_each(environments_.rbegin(), environments_.rend(),
                    TearDownEnvironment);
      repeater->OnEnvironmentsTearDownEnd(*parent_);
    }

    elapsed_time_ = GetTimeInMillis() - start;

    // Tells the unit test event listener that the tests have just finished.
    repeater->OnTestIterationEnd(*parent_, i);

    // Gets the result and clears it.
    if (!Passed()) {
      failed = true;
    }

    // Restores the original test order after the iteration.  This
    // allows the user to quickly repro a failure that happens in the
    // N-th iteration without repeating the first (N - 1) iterations.
    // This is not enclosed in "if (GTEST_FLAG(shuffle)) { ... }", in
    // case the user somehow changes the value of the flag somewhere
    // (it's always safe to unshuffle the tests).
    UnshuffleTests();

    if (GTEST_FLAG(shuffle)) {
      // Picks a new random seed for each iteration.
      random_seed_ = GetNextRandomSeed(random_seed_);
    }
  }

  repeater->OnTestProgramEnd(*parent_);

  return !failed;
}

// Reads the GTEST_SHARD_STATUS_FILE environment variable, and creates the file
// if the variable is present. If a file already exists at this location, this
// function will write over it. If the variable is present, but the file cannot
// be created, prints an error and exits.
void WriteToShardStatusFileIfNeeded() {
  const char* const test_shard_file = posix::GetEnv(kTestShardStatusFile);
  if (test_shard_file != NULL) {
    FILE* const file = posix::FOpen(test_shard_file, "w");
    if (file == NULL) {
      ColoredPrintf(COLOR_RED,
                    "Could not write to the test shard status file \"%s\" "
                    "specified by the %s environment variable.\n",
                    test_shard_file, kTestShardStatusFile);
      fflush(stdout);
      exit(EXIT_FAILURE);
    }
    fclose(file);
  }
}

// Checks whether sharding is enabled by examining the relevant
// environment variable values. If the variables are present,
// but inconsistent (i.e., shard_index >= total_shards), prints
// an error and exits. If in_subprocess_for_death_test, sharding is
// disabled because it must only be applied to the original test
// process. Otherwise, we could filter out death tests we intended to execute.
bool ShouldShard(const char* total_shards_env,
                 const char* shard_index_env,
                 bool in_subprocess_for_death_test) {
  if (in_subprocess_for_death_test) {
    return false;
  }

  const Int32 total_shards = Int32FromEnvOrDie(total_shards_env, -1);
  const Int32 shard_index = Int32FromEnvOrDie(shard_index_env, -1);

  if (total_shards == -1 && shard_index == -1) {
    return false;
  } else if (total_shards == -1 && shard_index != -1) {
    const Message msg = Message()
      << "Invalid environment variables: you have "
      << kTestShardIndex << " = " << shard_index
      << ", but have left " << kTestTotalShards << " unset.\n";
    ColoredPrintf(COLOR_RED, msg.GetString().c_str());
    fflush(stdout);
    exit(EXIT_FAILURE);
  } else if (total_shards != -1 && shard_index == -1) {
    const Message msg = Message()
      << "Invalid environment variables: you have "
      << kTestTotalShards << " = " << total_shards
      << ", but have left " << kTestShardIndex << " unset.\n";
    ColoredPrintf(COLOR_RED, msg.GetString().c_str());
    fflush(stdout);
    exit(EXIT_FAILURE);
  } else if (shard_index < 0 || shard_index >= total_shards) {
    const Message msg = Message()
      << "Invalid environment variables: we require 0 <= "
      << kTestShardIndex << " < " << kTestTotalShards
      << ", but you have " << kTestShardIndex << "=" << shard_index
      << ", " << kTestTotalShards << "=" << total_shards << ".\n";
    ColoredPrintf(COLOR_RED, msg.GetString().c_str());
    fflush(stdout);
    exit(EXIT_FAILURE);
  }

  return total_shards > 1;
}

// Parses the environment variable var as an Int32. If it is unset,
// returns default_val. If it is not an Int32, prints an error
// and aborts.
Int32 Int32FromEnvOrDie(const char* var, Int32 default_val) {
  const char* str_val = posix::GetEnv(var);
  if (str_val == NULL) {
    return default_val;
  }

  Int32 result;
  if (!ParseInt32(Message() << "The value of environment variable " << var,
                  str_val, &result)) {
    exit(EXIT_FAILURE);
  }
  return result;
}

// Given the total number of shards, the shard index, and the test id,
// returns true iff the test should be run on this shard. The test id is
// some arbitrary but unique non-negative integer assigned to each test
// method. Assumes that 0 <= shard_index < total_shards.
bool ShouldRunTestOnShard(int total_shards, int shard_index, int test_id) {
  return (test_id % total_shards) == shard_index;
}

// Compares the name of each test with the user-specified filter to
// decide whether the test should be run, then records the result in
// each TestCase and TestInfo object.
// If shard_tests == true, further filters tests based on sharding
// variables in the environment - see
// http://code.google.com/p/googletest/wiki/GoogleTestAdvancedGuide.
// Returns the number of tests that should run.
int UnitTestImpl::FilterTests(ReactionToSharding shard_tests) {
  const Int32 total_shards = shard_tests == HONOR_SHARDING_PROTOCOL ?
      Int32FromEnvOrDie(kTestTotalShards, -1) : -1;
  const Int32 shard_index = shard_tests == HONOR_SHARDING_PROTOCOL ?
      Int32FromEnvOrDie(kTestShardIndex, -1) : -1;

  // num_runnable_tests are the number of tests that will
  // run across all shards (i.e., match filter and are not disabled).
  // num_selected_tests are the number of tests to be run on
  // this shard.
  int num_runnable_tests = 0;
  int num_selected_tests = 0;
  for (size_t i = 0; i < test_cases_.size(); i++) {
    TestCase* const test_case = test_cases_[i];
    const std::string &test_case_name = test_case->name();
    test_case->set_should_run(false);

    for (size_t j = 0; j < test_case->test_info_list().size(); j++) {
      TestInfo* const test_info = test_case->test_info_list()[j];
      const std::string test_name(test_info->name());
      // A test is disabled if test case name or test name matches
      // kDisableTestFilter.
      const bool is_disabled =
          internal::UnitTestOptions::MatchesFilter(test_case_name,
                                                   kDisableTestFilter) ||
          internal::UnitTestOptions::MatchesFilter(test_name,
                                                   kDisableTestFilter);
      test_info->is_disabled_ = is_disabled;

      const bool matches_filter =
          internal::UnitTestOptions::FilterMatchesTest(test_case_name,
                                                       test_name);
      test_info->matches_filter_ = matches_filter;

      const bool is_runnable =
          (GTEST_FLAG(also_run_disabled_tests) || !is_disabled) &&
          matches_filter;

      const bool is_selected = is_runnable &&
          (shard_tests == IGNORE_SHARDING_PROTOCOL ||
           ShouldRunTestOnShard(total_shards, shard_index,
                                num_runnable_tests));

      num_runnable_tests += is_runnable;
      num_selected_tests += is_selected;

      test_info->should_run_ = is_selected;
      test_case->set_should_run(test_case->should_run() || is_selected);
    }
  }
  return num_selected_tests;
}

// Prints the given C-string on a single line by replacing all '\n'
// characters with string "\\n".  If the output takes more than
// max_length characters, only prints the first max_length characters
// and "...".
static void PrintOnOneLine(const char* str, int max_length) {
  if (str != NULL) {
    for (int i = 0; *str != '\0'; ++str) {
      if (i >= max_length) {
        printf("...");
        break;
      }
      if (*str == '\n') {
        printf("\\n");
        i += 2;
      } else {
        printf("%c", *str);
        ++i;
      }
    }
  }
}

// Prints the names of the tests matching the user-specified filter flag.
void UnitTestImpl::ListTestsMatchingFilter() {
  // Print at most this many characters for each type/value parameter.
  const int kMaxParamLength = 250;

  for (size_t i = 0; i < test_cases_.size(); i++) {
    const TestCase* const test_case = test_cases_[i];
    bool printed_test_case_name = false;

    for (size_t j = 0; j < test_case->test_info_list().size(); j++) {
      const TestInfo* const test_info =
          test_case->test_info_list()[j];
      if (test_info->matches_filter_) {
        if (!printed_test_case_name) {
          printed_test_case_name = true;
          printf("%s.", test_case->name());
          if (test_case->type_param() != NULL) {
            printf("  # %s = ", kTypeParamLabel);
            // We print the type parameter on a single line to make
            // the output easy to parse by a program.
            PrintOnOneLine(test_case->type_param(), kMaxParamLength);
          }
          printf("\n");
        }
        printf("  %s", test_info->name());
        if (test_info->value_param() != NULL) {
          printf("  # %s = ", kValueParamLabel);
          // We print the value parameter on a single line to make the
          // output easy to parse by a program.
          PrintOnOneLine(test_info->value_param(), kMaxParamLength);
        }
        printf("\n");
      }
    }
  }
  fflush(stdout);
}

// Sets the OS stack trace getter.
//
// Does nothing if the input and the current OS stack trace getter are
// the same; otherwise, deletes the old getter and makes the input the
// current getter.
void UnitTestImpl::set_os_stack_trace_getter(
    OsStackTraceGetterInterface* getter) {
  if (os_stack_trace_getter_ != getter) {
    delete os_stack_trace_getter_;
    os_stack_trace_getter_ = getter;
  }
}

// Returns the current OS stack trace getter if it is not NULL;
// otherwise, creates an OsStackTraceGetter, makes it the current
// getter, and returns it.
OsStackTraceGetterInterface* UnitTestImpl::os_stack_trace_getter() {
  if (os_stack_trace_getter_ == NULL) {
#ifdef GTEST_OS_STACK_TRACE_GETTER_
    os_stack_trace_getter_ = new GTEST_OS_STACK_TRACE_GETTER_;
#else
    os_stack_trace_getter_ = new OsStackTraceGetter;
#endif  // GTEST_OS_STACK_TRACE_GETTER_
  }

  return os_stack_trace_getter_;
}

// Returns the TestResult for the test that's currently running, or
// the TestResult for the ad hoc test if no test is running.
TestResult* UnitTestImpl::current_test_result() {
  return current_test_info_ ?
      &(current_test_info_->result_) : &ad_hoc_test_result_;
}

// Shuffles all test cases, and the tests within each test case,
// making sure that death tests are still run first.
void UnitTestImpl::ShuffleTests() {
  // Shuffles the death test cases.
  ShuffleRange(random(), 0, last_death_test_case_ + 1, &test_case_indices_);

  // Shuffles the non-death test cases.
  ShuffleRange(random(), last_death_test_case_ + 1,
               static_cast<int>(test_cases_.size()), &test_case_indices_);

  // Shuffles the tests inside each test case.
  for (size_t i = 0; i < test_cases_.size(); i++) {
    test_cases_[i]->ShuffleTests(random());
  }
}

// Restores the test cases and tests to their order before the first shuffle.
void UnitTestImpl::UnshuffleTests() {
  for (size_t i = 0; i < test_cases_.size(); i++) {
    // Unshuffles the tests in each test case.
    test_cases_[i]->UnshuffleTests();
    // Resets the index of each test case.
    test_case_indices_[i] = static_cast<int>(i);
  }
}

// Returns the current OS stack trace as an std::string.
//
// The maximum number of stack frames to be included is specified by
// the gtest_stack_trace_depth flag.  The skip_count parameter
// specifies the number of top frames to be skipped, which doesn't
// count against the number of frames to be included.
//
// For example, if Foo() calls Bar(), which in turn calls
// GetCurrentOsStackTraceExceptTop(..., 1), Foo() will be included in
// the trace but Bar() and GetCurrentOsStackTraceExceptTop() won't.
std::string GetCurrentOsStackTraceExceptTop(UnitTest* /*unit_test*/,
                                            int skip_count) {
  // We pass skip_count + 1 to skip this wrapper function in addition
  // to what the user really wants to skip.
  return GetUnitTestImpl()->CurrentOsStackTraceExceptTop(skip_count + 1);
}

// Used by the GTEST_SUPPRESS_UNREACHABLE_CODE_WARNING_BELOW_ macro to
// suppress unreachable code warnings.
namespace {
class ClassUniqueToAlwaysTrue {};
}

bool IsTrue(bool condition) { return condition; }

bool AlwaysTrue() {
#if GTEST_HAS_EXCEPTIONS
  // This condition is always false so AlwaysTrue() never actually throws,
  // but it makes the compiler think that it may throw.
  if (IsTrue(false))
    throw ClassUniqueToAlwaysTrue();
#endif  // GTEST_HAS_EXCEPTIONS
  return true;
}

// If *pstr starts with the given prefix, modifies *pstr to be right
// past the prefix and returns true; otherwise leaves *pstr unchanged
// and returns false.  None of pstr, *pstr, and prefix can be NULL.
bool SkipPrefix(const char* prefix, const char** pstr) {
  const size_t prefix_len = strlen(prefix);
  if (strncmp(*pstr, prefix, prefix_len) == 0) {
    *pstr += prefix_len;
    return true;
  }
  return false;
}

// Parses a string as a command line flag.  The string should have
// the format "--flag=value".  When def_optional is true, the "=value"
// part can be omitted.
//
// Returns the value of the flag, or NULL if the parsing failed.
const char* ParseFlagValue(const char* str,
                           const char* flag,
                           bool def_optional) {
  // str and flag must not be NULL.
  if (str == NULL || flag == NULL) return NULL;

  // The flag must start with "--" followed by GTEST_FLAG_PREFIX_.
  const std::string flag_str = std::string("--") + GTEST_FLAG_PREFIX_ + flag;
  const size_t flag_len = flag_str.length();
  if (strncmp(str, flag_str.c_str(), flag_len) != 0) return NULL;

  // Skips the flag name.
  const char* flag_end = str + flag_len;

  // When def_optional is true, it's OK to not have a "=value" part.
  if (def_optional && (flag_end[0] == '\0')) {
    return flag_end;
  }

  // If def_optional is true and there are more characters after the
  // flag name, or if def_optional is false, there must be a '=' after
  // the flag name.
  if (flag_end[0] != '=') return NULL;

  // Returns the string after "=".
  return flag_end + 1;
}

// Parses a string for a bool flag, in the form of either
// "--flag=value" or "--flag".
//
// In the former case, the value is taken as true as long as it does
// not start with '0', 'f', or 'F'.
//
// In the latter case, the value is taken as true.
//
// On success, stores the value of the flag in *value, and returns
// true.  On failure, returns false without changing *value.
bool ParseBoolFlag(const char* str, const char* flag, bool* value) {
  // Gets the value of the flag as a string.
  const char* const value_str = ParseFlagValue(str, flag, true);

  // Aborts if the parsing failed.
  if (value_str == NULL) return false;

  // Converts the string value to a bool.
  *value = !(*value_str == '0' || *value_str == 'f' || *value_str == 'F');
  return true;
}

// Parses a string for an Int32 flag, in the form of
// "--flag=value".
//
// On success, stores the value of the flag in *value, and returns
// true.  On failure, returns false without changing *value.
bool ParseInt32Flag(const char* str, const char* flag, Int32* value) {
  // Gets the value of the flag as a string.
  const char* const value_str = ParseFlagValue(str, flag, false);

  // Aborts if the parsing failed.
  if (value_str == NULL) return false;

  // Sets *value to the value of the flag.
  return ParseInt32(Message() << "The value of flag --" << flag,
                    value_str, value);
}

// Parses a string for a string flag, in the form of
// "--flag=value".
//
// On success, stores the value of the flag in *value, and returns
// true.  On failure, returns false without changing *value.
bool ParseStringFlag(const char* str, const char* flag, std::string* value) {
  // Gets the value of the flag as a string.
  const char* const value_str = ParseFlagValue(str, flag, false);

  // Aborts if the parsing failed.
  if (value_str == NULL) return false;

  // Sets *value to the value of the flag.
  *value = value_str;
  return true;
}

// Determines whether a string has a prefix that Google Test uses for its
// flags, i.e., starts with GTEST_FLAG_PREFIX_ or GTEST_FLAG_PREFIX_DASH_.
// If Google Test detects that a command line flag has its prefix but is not
// recognized, it will print its help message. Flags starting with
// GTEST_INTERNAL_PREFIX_ followed by "internal_" are considered Google Test
// internal flags and do not trigger the help message.
static bool HasGoogleTestFlagPrefix(const char* str) {
  return (SkipPrefix("--", &str) ||
          SkipPrefix("-", &str) ||
          SkipPrefix("/", &str)) &&
         !SkipPrefix(GTEST_FLAG_PREFIX_ "internal_", &str) &&
         (SkipPrefix(GTEST_FLAG_PREFIX_, &str) ||
          SkipPrefix(GTEST_FLAG_PREFIX_DASH_, &str));
}

// Prints a string containing code-encoded text.  The following escape
// sequences can be used in the string to control the text color:
//
//   @@    prints a single '@' character.
//   @R    changes the color to red.
//   @G    changes the color to green.
//   @Y    changes the color to yellow.
//   @D    changes to the default terminal text color.
//
// TODO(wan@google.com): Write tests for this once we add stdout
// capturing to Google Test.
static void PrintColorEncoded(const char* str) {
  GTestColor color = COLOR_DEFAULT;  // The current color.

  // Conceptually, we split the string into segments divided by escape
  // sequences.  Then we print one segment at a time.  At the end of
  // each iteration, the str pointer advances to the beginning of the
  // next segment.
  for (;;) {
    const char* p = strchr(str, '@');
    if (p == NULL) {
      ColoredPrintf(color, "%s", str);
      return;
    }

    ColoredPrintf(color, "%s", std::string(str, p).c_str());

    const char ch = p[1];
    str = p + 2;
    if (ch == '@') {
      ColoredPrintf(color, "@");
    } else if (ch == 'D') {
      color = COLOR_DEFAULT;
    } else if (ch == 'R') {
      color = COLOR_RED;
    } else if (ch == 'G') {
      color = COLOR_GREEN;
    } else if (ch == 'Y') {
      color = COLOR_YELLOW;
    } else {
      --str;
    }
  }
}

static const char kColorEncodedHelpMessage[] =
"This program contains tests written using " GTEST_NAME_ ". You can use the\n"
"following command line flags to control its behavior:\n"
"\n"
"Test Selection:\n"
"  @G--" GTEST_FLAG_PREFIX_ "list_tests@D\n"
"      List the names of all tests instead of running them. The name of\n"
"      TEST(Foo, Bar) is \"Foo.Bar\".\n"
"  @G--" GTEST_FLAG_PREFIX_ "filter=@YPOSTIVE_PATTERNS"
    "[@G-@YNEGATIVE_PATTERNS]@D\n"
"      Run only the tests whose name matches one of the positive patterns but\n"
"      none of the negative patterns. '?' matches any single character; '*'\n"
"      matches any substring; ':' separates two patterns.\n"
"  @G--" GTEST_FLAG_PREFIX_ "also_run_disabled_tests@D\n"
"      Run all disabled tests too.\n"
"\n"
"Test Execution:\n"
"  @G--" GTEST_FLAG_PREFIX_ "repeat=@Y[COUNT]@D\n"
"      Run the tests repeatedly; use a negative count to repeat forever.\n"
"  @G--" GTEST_FLAG_PREFIX_ "shuffle@D\n"
"      Randomize tests' orders on every iteration.\n"
"  @G--" GTEST_FLAG_PREFIX_ "random_seed=@Y[NUMBER]@D\n"
"      Random number seed to use for shuffling test orders (between 1 and\n"
"      99999, or 0 to use a seed based on the current time).\n"
"\n"
"Test Output:\n"
"  @G--" GTEST_FLAG_PREFIX_ "color=@Y(@Gyes@Y|@Gno@Y|@Gauto@Y)@D\n"
"      Enable/disable colored output. The default is @Gauto@D.\n"
"  -@G-" GTEST_FLAG_PREFIX_ "print_time=0@D\n"
"      Don't print the elapsed time of each test.\n"
"  @G--" GTEST_FLAG_PREFIX_ "output=xml@Y[@G:@YDIRECTORY_PATH@G"
    GTEST_PATH_SEP_ "@Y|@G:@YFILE_PATH]@D\n"
"      Generate an XML report in the given directory or with the given file\n"
"      name. @YFILE_PATH@D defaults to @Gtest_details.xml@D.\n"
#if GTEST_CAN_STREAM_RESULTS_
"  @G--" GTEST_FLAG_PREFIX_ "stream_result_to=@YHOST@G:@YPORT@D\n"
"      Stream test results to the given server.\n"
#endif  // GTEST_CAN_STREAM_RESULTS_
"\n"
"Assertion Behavior:\n"
#if GTEST_HAS_DEATH_TEST && !GTEST_OS_WINDOWS
"  @G--" GTEST_FLAG_PREFIX_ "death_test_style=@Y(@Gfast@Y|@Gthreadsafe@Y)@D\n"
"      Set the default death test style.\n"
#endif  // GTEST_HAS_DEATH_TEST && !GTEST_OS_WINDOWS
"  @G--" GTEST_FLAG_PREFIX_ "break_on_failure@D\n"
"      Turn assertion failures into debugger break-points.\n"
"  @G--" GTEST_FLAG_PREFIX_ "throw_on_failure@D\n"
"      Turn assertion failures into C++ exceptions.\n"
"  @G--" GTEST_FLAG_PREFIX_ "catch_exceptions=0@D\n"
"      Do not report exceptions as test failures. Instead, allow them\n"
"      to crash the program or throw a pop-up (on Windows).\n"
"\n"
"Except for @G--" GTEST_FLAG_PREFIX_ "list_tests@D, you can alternatively set "
    "the corresponding\n"
"environment variable of a flag (all letters in upper-case). For example, to\n"
"disable colored text output, you can either specify @G--" GTEST_FLAG_PREFIX_
    "color=no@D or set\n"
"the @G" GTEST_FLAG_PREFIX_UPPER_ "COLOR@D environment variable to @Gno@D.\n"
"\n"
"For more information, please read the " GTEST_NAME_ " documentation at\n"
"@G" GTEST_PROJECT_URL_ "@D. If you find a bug in " GTEST_NAME_ "\n"
"(not one in your own code or tests), please report it to\n"
"@G<" GTEST_DEV_EMAIL_ ">@D.\n";

bool ParseGoogleTestFlag(const char* const arg) {
  return ParseBoolFlag(arg, kAlsoRunDisabledTestsFlag,
                       &GTEST_FLAG(also_run_disabled_tests)) ||
      ParseBoolFlag(arg, kBreakOnFailureFlag,
                    &GTEST_FLAG(break_on_failure)) ||
      ParseBoolFlag(arg, kCatchExceptionsFlag,
                    &GTEST_FLAG(catch_exceptions)) ||
      ParseStringFlag(arg, kColorFlag, &GTEST_FLAG(color)) ||
      ParseStringFlag(arg, kDeathTestStyleFlag,
                      &GTEST_FLAG(death_test_style)) ||
      ParseBoolFlag(arg, kDeathTestUseFork,
                    &GTEST_FLAG(death_test_use_fork)) ||
      ParseStringFlag(arg, kFilterFlag, &GTEST_FLAG(filter)) ||
      ParseStringFlag(arg, kInternalRunDeathTestFlag,
                      &GTEST_FLAG(internal_run_death_test)) ||
      ParseBoolFlag(arg, kListTestsFlag, &GTEST_FLAG(list_tests)) ||
      ParseStringFlag(arg, kOutputFlag, &GTEST_FLAG(output)) ||
      ParseBoolFlag(arg, kPrintTimeFlag, &GTEST_FLAG(print_time)) ||
      ParseInt32Flag(arg, kRandomSeedFlag, &GTEST_FLAG(random_seed)) ||
      ParseInt32Flag(arg, kRepeatFlag, &GTEST_FLAG(repeat)) ||
      ParseBoolFlag(arg, kShuffleFlag, &GTEST_FLAG(shuffle)) ||
      ParseInt32Flag(arg, kStackTraceDepthFlag,
                     &GTEST_FLAG(stack_trace_depth)) ||
      ParseStringFlag(arg, kStreamResultToFlag,
                      &GTEST_FLAG(stream_result_to)) ||
      ParseBoolFlag(arg, kThrowOnFailureFlag,
                    &GTEST_FLAG(throw_on_failure));
}

#if GTEST_USE_OWN_FLAGFILE_FLAG_
void LoadFlagsFromFile(const std::string& path) {
  FILE* flagfile = posix::FOpen(path.c_str(), "r");
  if (!flagfile) {
    fprintf(stderr,
            "Unable to open file \"%s\"\n",
            GTEST_FLAG(flagfile).c_str());
    fflush(stderr);
    exit(EXIT_FAILURE);
  }
  std::string contents(ReadEntireFile(flagfile));
  posix::FClose(flagfile);
  std::vector<std::string> lines;
  SplitString(contents, '\n', &lines);
  for (size_t i = 0; i < lines.size(); ++i) {
    if (lines[i].empty())
      continue;
    if (!ParseGoogleTestFlag(lines[i].c_str()))
      g_help_flag = true;
  }
}
#endif  // GTEST_USE_OWN_FLAGFILE_FLAG_

// Parses the command line for Google Test flags, without initializing
// other parts of Google Test.  The type parameter CharType can be
// instantiated to either char or wchar_t.
template <typename CharType>
void ParseGoogleTestFlagsOnlyImpl(int* argc, CharType** argv) {
  for (int i = 1; i < *argc; i++) {
    const std::string arg_string = StreamableToString(argv[i]);
    const char* const arg = arg_string.c_str();

    using internal::ParseBoolFlag;
    using internal::ParseInt32Flag;
    using internal::ParseStringFlag;

    bool remove_flag = false;
    if (ParseGoogleTestFlag(arg)) {
      remove_flag = true;
#if GTEST_USE_OWN_FLAGFILE_FLAG_
    } else if (ParseStringFlag(arg, kFlagfileFlag, &GTEST_FLAG(flagfile))) {
      LoadFlagsFromFile(GTEST_FLAG(flagfile));
      remove_flag = true;
#endif  // GTEST_USE_OWN_FLAGFILE_FLAG_
    } else if (arg_string == "--help" || arg_string == "-h" ||
               arg_string == "-?" || arg_string == "/?" ||
               HasGoogleTestFlagPrefix(arg)) {
      // Both help flag and unrecognized Google Test flags (excluding
      // internal ones) trigger help display.
      g_help_flag = true;
    }

    if (remove_flag) {
      // Shift the remainder of the argv list left by one.  Note
      // that argv has (*argc + 1) elements, the last one always being
      // NULL.  The following loop moves the trailing NULL element as
      // well.
      for (int j = i; j != *argc; j++) {
        argv[j] = argv[j + 1];
      }

      // Decrements the argument count.
      (*argc)--;

      // We also need to decrement the iterator as we just removed
      // an element.
      i--;
    }
  }

  if (g_help_flag) {
    // We print the help here instead of in RUN_ALL_TESTS(), as the
    // latter may not be called at all if the user is using Google
    // Test with another testing framework.
    PrintColorEncoded(kColorEncodedHelpMessage);
  }
}

// Parses the command line for Google Test flags, without initializing
// other parts of Google Test.
void ParseGoogleTestFlagsOnly(int* argc, char** argv) {
  ParseGoogleTestFlagsOnlyImpl(argc, argv);
}
void ParseGoogleTestFlagsOnly(int* argc, wchar_t** argv) {
  ParseGoogleTestFlagsOnlyImpl(argc, argv);
}

// The internal implementation of InitGoogleTest().
//
// The type parameter CharType can be instantiated to either char or
// wchar_t.
template <typename CharType>
void InitGoogleTestImpl(int* argc, CharType** argv) {
  // We don't want to run the initialization code twice.
  if (GTestIsInitialized()) return;

  if (*argc <= 0) return;

  g_argvs.clear();
  for (int i = 0; i != *argc; i++) {
    g_argvs.push_back(StreamableToString(argv[i]));
  }

  ParseGoogleTestFlagsOnly(argc, argv);
  GetUnitTestImpl()->PostFlagParsingInit();
}

}  // namespace internal

// Initializes Google Test.  This must be called before calling
// RUN_ALL_TESTS().  In particular, it parses a command line for the
// flags that Google Test recognizes.  Whenever a Google Test flag is
// seen, it is removed from argv, and *argc is decremented.
//
// No value is returned.  Instead, the Google Test flag variables are
// updated.
//
// Calling the function for the second time has no user-visible effect.
void InitGoogleTest(int* argc, char** argv) {
#if defined(GTEST_CUSTOM_INIT_GOOGLE_TEST_FUNCTION_)
  GTEST_CUSTOM_INIT_GOOGLE_TEST_FUNCTION_(argc, argv);
#else  // defined(GTEST_CUSTOM_INIT_GOOGLE_TEST_FUNCTION_)
  internal::InitGoogleTestImpl(argc, argv);
#endif  // defined(GTEST_CUSTOM_INIT_GOOGLE_TEST_FUNCTION_)
}

// This overloaded version can be used in Windows programs compiled in
// UNICODE mode.
void InitGoogleTest(int* argc, wchar_t** argv) {
#if defined(GTEST_CUSTOM_INIT_GOOGLE_TEST_FUNCTION_)
  GTEST_CUSTOM_INIT_GOOGLE_TEST_FUNCTION_(argc, argv);
#else  // defined(GTEST_CUSTOM_INIT_GOOGLE_TEST_FUNCTION_)
  internal::InitGoogleTestImpl(argc, argv);
#endif  // defined(GTEST_CUSTOM_INIT_GOOGLE_TEST_FUNCTION_)
}

}  // namespace testing
// Copyright 2005, Google Inc.
// All rights reserved.
//
// Redistribution and use in source and binary forms, with or without
// modification, are permitted provided that the following conditions are
// met:
//
//     * Redistributions of source code must retain the above copyright
// notice, this list of conditions and the following disclaimer.
//     * Redistributions in binary form must reproduce the above
// copyright notice, this list of conditions and the following disclaimer
// in the documentation and/or other materials provided with the
// distribution.
//     * Neither the name of Google Inc. nor the names of its
// contributors may be used to endorse or promote products derived from
// this software without specific prior written permission.
//
// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
// "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
// LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
// A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
// OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
// SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
// LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
// DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
// THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
//
// Author: wan@google.com (Zhanyong Wan), vladl@google.com (Vlad Losev)
//
// This file implements death tests.


#if GTEST_HAS_DEATH_TEST

# if GTEST_OS_MAC
#  include <crt_externs.h>
# endif  // GTEST_OS_MAC

# include <errno.h>
# include <fcntl.h>
# include <limits.h>

# if GTEST_OS_LINUX
#  include <signal.h>
# endif  // GTEST_OS_LINUX

# include <stdarg.h>

# if GTEST_OS_WINDOWS
#  include <windows.h>
# else
#  include <sys/mman.h>
#  include <sys/wait.h>
# endif  // GTEST_OS_WINDOWS

# if GTEST_OS_QNX
#  include <spawn.h>
# endif  // GTEST_OS_QNX

#endif  // GTEST_HAS_DEATH_TEST


// Indicates that this translation unit is part of Google Test's
// implementation.  It must come before gtest-internal-inl.h is
// included, or there will be a compiler error.  This trick exists to
// prevent the accidental inclusion of gtest-internal-inl.h in the
// user's code.
#define GTEST_IMPLEMENTATION_ 1
#undef GTEST_IMPLEMENTATION_

namespace testing {

// Constants.

// The default death test style.
static const char kDefaultDeathTestStyle[] = "fast";

GTEST_DEFINE_string_(
    death_test_style,
    internal::StringFromGTestEnv("death_test_style", kDefaultDeathTestStyle),
    "Indicates how to run a death test in a forked child process: "
    "\"threadsafe\" (child process re-executes the test binary "
    "from the beginning, running only the specific death test) or "
    "\"fast\" (child process runs the death test immediately "
    "after forking).");

GTEST_DEFINE_bool_(
    death_test_use_fork,
    internal::BoolFromGTestEnv("death_test_use_fork", false),
    "Instructs to use fork()/_exit() instead of clone() in death tests. "
    "Ignored and always uses fork() on POSIX systems where clone() is not "
    "implemented. Useful when running under valgrind or similar tools if "
    "those do not support clone(). Valgrind 3.3.1 will just fail if "
    "it sees an unsupported combination of clone() flags. "
    "It is not recommended to use this flag w/o valgrind though it will "
    "work in 99% of the cases. Once valgrind is fixed, this flag will "
    "most likely be removed.");

namespace internal {
GTEST_DEFINE_string_(
    internal_run_death_test, "",
    "Indicates the file, line number, temporal index of "
    "the single death test to run, and a file descriptor to "
    "which a success code may be sent, all separated by "
    "the '|' characters.  This flag is specified if and only if the current "
    "process is a sub-process launched for running a thread-safe "
    "death test.  FOR INTERNAL USE ONLY.");
}  // namespace internal

#if GTEST_HAS_DEATH_TEST

namespace internal {

// Valid only for fast death tests. Indicates the code is running in the
// child process of a fast style death test.
# if !GTEST_OS_WINDOWS
static bool g_in_fast_death_test_child = false;
# endif

// Returns a Boolean value indicating whether the caller is currently
// executing in the context of the death test child process.  Tools such as
// Valgrind heap checkers may need this to modify their behavior in death
// tests.  IMPORTANT: This is an internal utility.  Using it may break the
// implementation of death tests.  User code MUST NOT use it.
bool InDeathTestChild() {
# if GTEST_OS_WINDOWS

  // On Windows, death tests are thread-safe regardless of the value of the
  // death_test_style flag.
  return !GTEST_FLAG(internal_run_death_test).empty();

# else

  if (GTEST_FLAG(death_test_style) == "threadsafe")
    return !GTEST_FLAG(internal_run_death_test).empty();
  else
    return g_in_fast_death_test_child;
#endif
}

}  // namespace internal

// ExitedWithCode constructor.
ExitedWithCode::ExitedWithCode(int exit_code) : exit_code_(exit_code) {
}

// ExitedWithCode function-call operator.
bool ExitedWithCode::operator()(int exit_status) const {
# if GTEST_OS_WINDOWS

  return exit_status == exit_code_;

# else

  return WIFEXITED(exit_status) && WEXITSTATUS(exit_status) == exit_code_;

# endif  // GTEST_OS_WINDOWS
}

# if !GTEST_OS_WINDOWS
// KilledBySignal constructor.
KilledBySignal::KilledBySignal(int signum) : signum_(signum) {
}

// KilledBySignal function-call operator.
bool KilledBySignal::operator()(int exit_status) const {
#  if defined(GTEST_KILLED_BY_SIGNAL_OVERRIDE_)
  {
    bool result;
    if (GTEST_KILLED_BY_SIGNAL_OVERRIDE_(signum_, exit_status, &result)) {
      return result;
    }
  }
#  endif  // defined(GTEST_KILLED_BY_SIGNAL_OVERRIDE_)
  return WIFSIGNALED(exit_status) && WTERMSIG(exit_status) == signum_;
}
# endif  // !GTEST_OS_WINDOWS

namespace internal {

// Utilities needed for death tests.

// Generates a textual description of a given exit code, in the format
// specified by wait(2).
static std::string ExitSummary(int exit_code) {
  Message m;

# if GTEST_OS_WINDOWS

  m << "Exited with exit status " << exit_code;

# else

  if (WIFEXITED(exit_code)) {
    m << "Exited with exit status " << WEXITSTATUS(exit_code);
  } else if (WIFSIGNALED(exit_code)) {
    m << "Terminated by signal " << WTERMSIG(exit_code);
  }
#  ifdef WCOREDUMP
  if (WCOREDUMP(exit_code)) {
    m << " (core dumped)";
  }
#  endif
# endif  // GTEST_OS_WINDOWS

  return m.GetString();
}

// Returns true if exit_status describes a process that was terminated
// by a signal, or exited normally with a nonzero exit code.
bool ExitedUnsuccessfully(int exit_status) {
  return !ExitedWithCode(0)(exit_status);
}

# if !GTEST_OS_WINDOWS
// Generates a textual failure message when a death test finds more than
// one thread running, or cannot determine the number of threads, prior
// to executing the given statement.  It is the responsibility of the
// caller not to pass a thread_count of 1.
static std::string DeathTestThreadWarning(size_t thread_count) {
  Message msg;
  msg << "Death tests use fork(), which is unsafe particularly"
      << " in a threaded context. For this test, " << GTEST_NAME_ << " ";
  if (thread_count == 0)
    msg << "couldn't detect the number of threads.";
  else
    msg << "detected " << thread_count << " threads.";
  return msg.GetString();
}
# endif  // !GTEST_OS_WINDOWS

// Flag characters for reporting a death test that did not die.
static const char kDeathTestLived = 'L';
static const char kDeathTestReturned = 'R';
static const char kDeathTestThrew = 'T';
static const char kDeathTestInternalError = 'I';

// An enumeration describing all of the possible ways that a death test can
// conclude.  DIED means that the process died while executing the test
// code; LIVED means that process lived beyond the end of the test code;
// RETURNED means that the test statement attempted to execute a return
// statement, which is not allowed; THREW means that the test statement
// returned control by throwing an exception.  IN_PROGRESS means the test
// has not yet concluded.
// TODO(vladl@google.com): Unify names and possibly values for
// AbortReason, DeathTestOutcome, and flag characters above.
enum DeathTestOutcome { IN_PROGRESS, DIED, LIVED, RETURNED, THREW };

// Routine for aborting the program which is safe to call from an
// exec-style death test child process, in which case the error
// message is propagated back to the parent process.  Otherwise, the
// message is simply printed to stderr.  In either case, the program
// then exits with status 1.
void DeathTestAbort(const std::string& message) {
  // On a POSIX system, this function may be called from a threadsafe-style
  // death test child process, which operates on a very small stack.  Use
  // the heap for any additional non-minuscule memory requirements.
  const InternalRunDeathTestFlag* const flag =
      GetUnitTestImpl()->internal_run_death_test_flag();
  if (flag != NULL) {
    FILE* parent = posix::FDOpen(flag->write_fd(), "w");
    fputc(kDeathTestInternalError, parent);
    fprintf(parent, "%s", message.c_str());
    fflush(parent);
    _exit(1);
  } else {
    fprintf(stderr, "%s", message.c_str());
    fflush(stderr);
    posix::Abort();
  }
}

// A replacement for CHECK that calls DeathTestAbort if the assertion
// fails.
# define GTEST_DEATH_TEST_CHECK_(expression) \
  do { \
    if (!::testing::internal::IsTrue(expression)) { \
      DeathTestAbort( \
          ::std::string("CHECK failed: File ") + __FILE__ +  ", line " \
          + ::testing::internal::StreamableToString(__LINE__) + ": " \
          + #expression); \
    } \
  } while (::testing::internal::AlwaysFalse())

// This macro is similar to GTEST_DEATH_TEST_CHECK_, but it is meant for
// evaluating any system call that fulfills two conditions: it must return
// -1 on failure, and set errno to EINTR when it is interrupted and
// should be tried again.  The macro expands to a loop that repeatedly
// evaluates the expression as long as it evaluates to -1 and sets
// errno to EINTR.  If the expression evaluates to -1 but errno is
// something other than EINTR, DeathTestAbort is called.
# define GTEST_DEATH_TEST_CHECK_SYSCALL_(expression) \
  do { \
    int gtest_retval; \
    do { \
      gtest_retval = (expression); \
    } while (gtest_retval == -1 && errno == EINTR); \
    if (gtest_retval == -1) { \
      DeathTestAbort( \
          ::std::string("CHECK failed: File ") + __FILE__ + ", line " \
          + ::testing::internal::StreamableToString(__LINE__) + ": " \
          + #expression + " != -1"); \
    } \
  } while (::testing::internal::AlwaysFalse())

// Returns the message describing the last system error in errno.
std::string GetLastErrnoDescription() {
    return errno == 0 ? "" : posix::StrError(errno);
}

// This is called from a death test parent process to read a failure
// message from the death test child process and log it with the FATAL
// severity. On Windows, the message is read from a pipe handle. On other
// platforms, it is read from a file descriptor.
static void FailFromInternalError(int fd) {
  Message error;
  char buffer[256];
  int num_read;

  do {
    while ((num_read = posix::Read(fd, buffer, 255)) > 0) {
      buffer[num_read] = '\0';
      error << buffer;
    }
  } while (num_read == -1 && errno == EINTR);

  if (num_read == 0) {
    GTEST_LOG_(FATAL) << error.GetString();
  } else {
    const int last_error = errno;
    GTEST_LOG_(FATAL) << "Error while reading death test internal: "
                      << GetLastErrnoDescription() << " [" << last_error << "]";
  }
}

// Death test constructor.  Increments the running death test count
// for the current test.
DeathTest::DeathTest() {
  TestInfo* const info = GetUnitTestImpl()->current_test_info();
  if (info == NULL) {
    DeathTestAbort("Cannot run a death test outside of a TEST or "
                   "TEST_F construct");
  }
}

// Creates and returns a death test by dispatching to the current
// death test factory.
bool DeathTest::Create(const char* statement, const RE* regex,
                       const char* file, int line, DeathTest** test) {
  return GetUnitTestImpl()->death_test_factory()->Create(
      statement, regex, file, line, test);
}

const char* DeathTest::LastMessage() {
  return last_death_test_message_.c_str();
}

void DeathTest::set_last_death_test_message(const std::string& message) {
  last_death_test_message_ = message;
}

std::string DeathTest::last_death_test_message_;

// Provides cross platform implementation for some death functionality.
class DeathTestImpl : public DeathTest {
 protected:
  DeathTestImpl(const char* a_statement, const RE* a_regex)
      : statement_(a_statement),
        regex_(a_regex),
        spawned_(false),
        status_(-1),
        outcome_(IN_PROGRESS),
        read_fd_(-1),
        write_fd_(-1) {}

  // read_fd_ is expected to be closed and cleared by a derived class.
  ~DeathTestImpl() { GTEST_DEATH_TEST_CHECK_(read_fd_ == -1); }

  void Abort(AbortReason reason);
  virtual bool Passed(bool status_ok);

  const char* statement() const { return statement_; }
  const RE* regex() const { return regex_; }
  bool spawned() const { return spawned_; }
  void set_spawned(bool is_spawned) { spawned_ = is_spawned; }
  int status() const { return status_; }
  void set_status(int a_status) { status_ = a_status; }
  DeathTestOutcome outcome() const { return outcome_; }
  void set_outcome(DeathTestOutcome an_outcome) { outcome_ = an_outcome; }
  int read_fd() const { return read_fd_; }
  void set_read_fd(int fd) { read_fd_ = fd; }
  int write_fd() const { return write_fd_; }
  void set_write_fd(int fd) { write_fd_ = fd; }

  // Called in the parent process only. Reads the result code of the death
  // test child process via a pipe, interprets it to set the outcome_
  // member, and closes read_fd_.  Outputs diagnostics and terminates in
  // case of unexpected codes.
  void ReadAndInterpretStatusByte();

 private:
  // The textual content of the code this object is testing.  This class
  // doesn't own this string and should not attempt to delete it.
  const char* const statement_;
  // The regular expression which test output must match.  DeathTestImpl
  // doesn't own this object and should not attempt to delete it.
  const RE* const regex_;
  // True if the death test child process has been successfully spawned.
  bool spawned_;
  // The exit status of the child process.
  int status_;
  // How the death test concluded.
  DeathTestOutcome outcome_;
  // Descriptor to the read end of the pipe to the child process.  It is
  // always -1 in the child process.  The child keeps its write end of the
  // pipe in write_fd_.
  int read_fd_;
  // Descriptor to the child's write end of the pipe to the parent process.
  // It is always -1 in the parent process.  The parent keeps its end of the
  // pipe in read_fd_.
  int write_fd_;
};

// Called in the parent process only. Reads the result code of the death
// test child process via a pipe, interprets it to set the outcome_
// member, and closes read_fd_.  Outputs diagnostics and terminates in
// case of unexpected codes.
void DeathTestImpl::ReadAndInterpretStatusByte() {
  char flag;
  int bytes_read;

  // The read() here blocks until data is available (signifying the
  // failure of the death test) or until the pipe is closed (signifying
  // its success), so it's okay to call this in the parent before
  // the child process has exited.
  do {
    bytes_read = posix::Read(read_fd(), &flag, 1);
  } while (bytes_read == -1 && errno == EINTR);

  if (bytes_read == 0) {
    set_outcome(DIED);
  } else if (bytes_read == 1) {
    switch (flag) {
      case kDeathTestReturned:
        set_outcome(RETURNED);
        break;
      case kDeathTestThrew:
        set_outcome(THREW);
        break;
      case kDeathTestLived:
        set_outcome(LIVED);
        break;
      case kDeathTestInternalError:
        FailFromInternalError(read_fd());  // Does not return.
        break;
      default:
        GTEST_LOG_(FATAL) << "Death test child process reported "
                          << "unexpected status byte ("
                          << static_cast<unsigned int>(flag) << ")";
    }
  } else {
    GTEST_LOG_(FATAL) << "Read from death test child process failed: "
                      << GetLastErrnoDescription();
  }
  GTEST_DEATH_TEST_CHECK_SYSCALL_(posix::Close(read_fd()));
  set_read_fd(-1);
}

// Signals that the death test code which should have exited, didn't.
// Should be called only in a death test child process.
// Writes a status byte to the child's status file descriptor, then
// calls _exit(1).
void DeathTestImpl::Abort(AbortReason reason) {
  // The parent process considers the death test to be a failure if
  // it finds any data in our pipe.  So, here we write a single flag byte
  // to the pipe, then exit.
  const char status_ch =
      reason == TEST_DID_NOT_DIE ? kDeathTestLived :
      reason == TEST_THREW_EXCEPTION ? kDeathTestThrew : kDeathTestReturned;

  GTEST_DEATH_TEST_CHECK_SYSCALL_(posix::Write(write_fd(), &status_ch, 1));
  // We are leaking the descriptor here because on some platforms (i.e.,
  // when built as Windows DLL), destructors of global objects will still
  // run after calling _exit(). On such systems, write_fd_ will be
  // indirectly closed from the destructor of UnitTestImpl, causing double
  // close if it is also closed here. On debug configurations, double close
  // may assert. As there are no in-process buffers to flush here, we are
  // relying on the OS to close the descriptor after the process terminates
  // when the destructors are not run.
  _exit(1);  // Exits w/o any normal exit hooks (we were supposed to crash)
}

// Returns an indented copy of stderr output for a death test.
// This makes distinguishing death test output lines from regular log lines
// much easier.
static ::std::string FormatDeathTestOutput(const ::std::string& output) {
  ::std::string ret;
  for (size_t at = 0; ; ) {
    const size_t line_end = output.find('\n', at);
    ret += "[  DEATH   ] ";
    if (line_end == ::std::string::npos) {
      ret += output.substr(at);
      break;
    }
    ret += output.substr(at, line_end + 1 - at);
    at = line_end + 1;
  }
  return ret;
}

// Assesses the success or failure of a death test, using both private
// members which have previously been set, and one argument:
//
// Private data members:
//   outcome:  An enumeration describing how the death test
//             concluded: DIED, LIVED, THREW, or RETURNED.  The death test
//             fails in the latter three cases.
//   status:   The exit status of the child process. On *nix, it is in the
//             in the format specified by wait(2). On Windows, this is the
//             value supplied to the ExitProcess() API or a numeric code
//             of the exception that terminated the program.
//   regex:    A regular expression object to be applied to
//             the test's captured standard error output; the death test
//             fails if it does not match.
//
// Argument:
//   status_ok: true if exit_status is acceptable in the context of
//              this particular death test, which fails if it is false
//
// Returns true iff all of the above conditions are met.  Otherwise, the
// first failing condition, in the order given above, is the one that is
// reported. Also sets the last death test message string.
bool DeathTestImpl::Passed(bool status_ok) {
  if (!spawned())
    return false;

  const std::string error_message = GetCapturedStderr();

  bool success = false;
  Message buffer;

  buffer << "Death test: " << statement() << "\n";
  switch (outcome()) {
    case LIVED:
      buffer << "    Result: failed to die.\n"
             << " Error msg:\n" << FormatDeathTestOutput(error_message);
      break;
    case THREW:
      buffer << "    Result: threw an exception.\n"
             << " Error msg:\n" << FormatDeathTestOutput(error_message);
      break;
    case RETURNED:
      buffer << "    Result: illegal return in test statement.\n"
             << " Error msg:\n" << FormatDeathTestOutput(error_message);
      break;
    case DIED:
      if (status_ok) {
        const bool matched = RE::PartialMatch(error_message.c_str(), *regex());
        if (matched) {
          success = true;
        } else {
          buffer << "    Result: died but not with expected error.\n"
                 << "  Expected: " << regex()->pattern() << "\n"
                 << "Actual msg:\n" << FormatDeathTestOutput(error_message);
        }
      } else {
        buffer << "    Result: died but not with expected exit code:\n"
               << "            " << ExitSummary(status()) << "\n"
               << "Actual msg:\n" << FormatDeathTestOutput(error_message);
      }
      break;
    case IN_PROGRESS:
    default:
      GTEST_LOG_(FATAL)
          << "DeathTest::Passed somehow called before conclusion of test";
  }

  DeathTest::set_last_death_test_message(buffer.GetString());
  return success;
}

# if GTEST_OS_WINDOWS
// WindowsDeathTest implements death tests on Windows. Due to the
// specifics of starting new processes on Windows, death tests there are
// always threadsafe, and Google Test considers the
// --gtest_death_test_style=fast setting to be equivalent to
// --gtest_death_test_style=threadsafe there.
//
// A few implementation notes:  Like the Linux version, the Windows
// implementation uses pipes for child-to-parent communication. But due to
// the specifics of pipes on Windows, some extra steps are required:
//
// 1. The parent creates a communication pipe and stores handles to both
//    ends of it.
// 2. The parent starts the child and provides it with the information
//    necessary to acquire the handle to the write end of the pipe.
// 3. The child acquires the write end of the pipe and signals the parent
//    using a Windows event.
// 4. Now the parent can release the write end of the pipe on its side. If
//    this is done before step 3, the object's reference count goes down to
//    0 and it is destroyed, preventing the child from acquiring it. The
//    parent now has to release it, or read operations on the read end of
//    the pipe will not return when the child terminates.
// 5. The parent reads child's output through the pipe (outcome code and
//    any possible error messages) from the pipe, and its stderr and then
//    determines whether to fail the test.
//
// Note: to distinguish Win32 API calls from the local method and function
// calls, the former are explicitly resolved in the global namespace.
//
class WindowsDeathTest : public DeathTestImpl {
 public:
  WindowsDeathTest(const char* a_statement,
                   const RE* a_regex,
                   const char* file,
                   int line)
      : DeathTestImpl(a_statement, a_regex), file_(file), line_(line) {}

  // All of these virtual functions are inherited from DeathTest.
  virtual int Wait();
  virtual TestRole AssumeRole();

 private:
  // The name of the file in which the death test is located.
  const char* const file_;
  // The line number on which the death test is located.
  const int line_;
  // Handle to the write end of the pipe to the child process.
  AutoHandle write_handle_;
  // Child process handle.
  AutoHandle child_handle_;
  // Event the child process uses to signal the parent that it has
  // acquired the handle to the write end of the pipe. After seeing this
  // event the parent can release its own handles to make sure its
  // ReadFile() calls return when the child terminates.
  AutoHandle event_handle_;
};

// Waits for the child in a death test to exit, returning its exit
// status, or 0 if no child process exists.  As a side effect, sets the
// outcome data member.
int WindowsDeathTest::Wait() {
  if (!spawned())
    return 0;

  // Wait until the child either signals that it has acquired the write end
  // of the pipe or it dies.
  const HANDLE wait_handles[2] = { child_handle_.Get(), event_handle_.Get() };
  switch (::WaitForMultipleObjects(2,
                                   wait_handles,
                                   FALSE,  // Waits for any of the handles.
                                   INFINITE)) {
    case WAIT_OBJECT_0:
    case WAIT_OBJECT_0 + 1:
      break;
    default:
      GTEST_DEATH_TEST_CHECK_(false);  // Should not get here.
  }

  // The child has acquired the write end of the pipe or exited.
  // We release the handle on our side and continue.
  write_handle_.Reset();
  event_handle_.Reset();

  ReadAndInterpretStatusByte();

  // Waits for the child process to exit if it haven't already. This
  // returns immediately if the child has already exited, regardless of
  // whether previous calls to WaitForMultipleObjects synchronized on this
  // handle or not.
  GTEST_DEATH_TEST_CHECK_(
      WAIT_OBJECT_0 == ::WaitForSingleObject(child_handle_.Get(),
                                             INFINITE));
  DWORD status_code;
  GTEST_DEATH_TEST_CHECK_(
      ::GetExitCodeProcess(child_handle_.Get(), &status_code) != FALSE);
  child_handle_.Reset();
  set_status(static_cast<int>(status_code));
  return status();
}

// The AssumeRole process for a Windows death test.  It creates a child
// process with the same executable as the current process to run the
// death test.  The child process is given the --gtest_filter and
// --gtest_internal_run_death_test flags such that it knows to run the
// current death test only.
DeathTest::TestRole WindowsDeathTest::AssumeRole() {
  const UnitTestImpl* const impl = GetUnitTestImpl();
  const InternalRunDeathTestFlag* const flag =
      impl->internal_run_death_test_flag();
  const TestInfo* const info = impl->current_test_info();
  const int death_test_index = info->result()->death_test_count();

  if (flag != NULL) {
    // ParseInternalRunDeathTestFlag() has performed all the necessary
    // processing.
    set_write_fd(flag->write_fd());
    return EXECUTE_TEST;
  }

  // WindowsDeathTest uses an anonymous pipe to communicate results of
  // a death test.
  SECURITY_ATTRIBUTES handles_are_inheritable = {
    sizeof(SECURITY_ATTRIBUTES), NULL, TRUE };
  HANDLE read_handle, write_handle;
  GTEST_DEATH_TEST_CHECK_(
      ::CreatePipe(&read_handle, &write_handle, &handles_are_inheritable,
                   0)  // Default buffer size.
      != FALSE);
  set_read_fd(::_open_osfhandle(reinterpret_cast<intptr_t>(read_handle),
                                O_RDONLY));
  write_handle_.Reset(write_handle);
  event_handle_.Reset(::CreateEvent(
      &handles_are_inheritable,
      TRUE,    // The event will automatically reset to non-signaled state.
      FALSE,   // The initial state is non-signalled.
      NULL));  // The even is unnamed.
  GTEST_DEATH_TEST_CHECK_(event_handle_.Get() != NULL);
  const std::string filter_flag =
      std::string("--") + GTEST_FLAG_PREFIX_ + kFilterFlag + "=" +
      info->test_case_name() + "." + info->name();
  const std::string internal_flag =
      std::string("--") + GTEST_FLAG_PREFIX_ + kInternalRunDeathTestFlag +
      "=" + file_ + "|" + StreamableToString(line_) + "|" +
      StreamableToString(death_test_index) + "|" +
      StreamableToString(static_cast<unsigned int>(::GetCurrentProcessId())) +
      // size_t has the same width as pointers on both 32-bit and 64-bit
      // Windows platforms.
      // See http://msdn.microsoft.com/en-us/library/tcxf1dw6.aspx.
      "|" + StreamableToString(reinterpret_cast<size_t>(write_handle)) +
      "|" + StreamableToString(reinterpret_cast<size_t>(event_handle_.Get()));

  char executable_path[_MAX_PATH + 1];  // NOLINT
  GTEST_DEATH_TEST_CHECK_(
      _MAX_PATH + 1 != ::GetModuleFileNameA(NULL,
                                            executable_path,
                                            _MAX_PATH));

  std::string command_line =
      std::string(::GetCommandLineA()) + " " + filter_flag + " \"" +
      internal_flag + "\"";

  DeathTest::set_last_death_test_message("");

  CaptureStderr();
  // Flush the log buffers since the log streams are shared with the child.
  FlushInfoLog();

  // The child process will share the standard handles with the parent.
  STARTUPINFOA startup_info;
  memset(&startup_info, 0, sizeof(STARTUPINFO));
  startup_info.dwFlags = STARTF_USESTDHANDLES;
  startup_info.hStdInput = ::GetStdHandle(STD_INPUT_HANDLE);
  startup_info.hStdOutput = ::GetStdHandle(STD_OUTPUT_HANDLE);
  startup_info.hStdError = ::GetStdHandle(STD_ERROR_HANDLE);

  PROCESS_INFORMATION process_info;
  GTEST_DEATH_TEST_CHECK_(::CreateProcessA(
      executable_path,
      const_cast<char*>(command_line.c_str()),
      NULL,   // Retuned process handle is not inheritable.
      NULL,   // Retuned thread handle is not inheritable.
      TRUE,   // Child inherits all inheritable handles (for write_handle_).
      0x0,    // Default creation flags.
      NULL,   // Inherit the parent's environment.
      UnitTest::GetInstance()->original_working_dir(),
      &startup_info,
      &process_info) != FALSE);
  child_handle_.Reset(process_info.hProcess);
  ::CloseHandle(process_info.hThread);
  set_spawned(true);
  return OVERSEE_TEST;
}
# else  // We are not on Windows.

// ForkingDeathTest provides implementations for most of the abstract
// methods of the DeathTest interface.  Only the AssumeRole method is
// left undefined.
class ForkingDeathTest : public DeathTestImpl {
 public:
  ForkingDeathTest(const char* statement, const RE* regex);

  // All of these virtual functions are inherited from DeathTest.
  virtual int Wait();

 protected:
  void set_child_pid(pid_t child_pid) { child_pid_ = child_pid; }

 private:
  // PID of child process during death test; 0 in the child process itself.
  pid_t child_pid_;
};

// Constructs a ForkingDeathTest.
ForkingDeathTest::ForkingDeathTest(const char* a_statement, const RE* a_regex)
    : DeathTestImpl(a_statement, a_regex),
      child_pid_(-1) {}

// Waits for the child in a death test to exit, returning its exit
// status, or 0 if no child process exists.  As a side effect, sets the
// outcome data member.
int ForkingDeathTest::Wait() {
  if (!spawned())
    return 0;

  ReadAndInterpretStatusByte();

  int status_value;
  GTEST_DEATH_TEST_CHECK_SYSCALL_(waitpid(child_pid_, &status_value, 0));
  set_status(status_value);
  return status_value;
}

// A concrete death test class that forks, then immediately runs the test
// in the child process.
class NoExecDeathTest : public ForkingDeathTest {
 public:
  NoExecDeathTest(const char* a_statement, const RE* a_regex) :
      ForkingDeathTest(a_statement, a_regex) { }
  virtual TestRole AssumeRole();
};

// The AssumeRole process for a fork-and-run death test.  It implements a
// straightforward fork, with a simple pipe to transmit the status byte.
DeathTest::TestRole NoExecDeathTest::AssumeRole() {
  const size_t thread_count = GetThreadCount();
  if (thread_count != 1) {
    GTEST_LOG_(WARNING) << DeathTestThreadWarning(thread_count);
  }

  int pipe_fd[2];
  GTEST_DEATH_TEST_CHECK_(pipe(pipe_fd) != -1);

  DeathTest::set_last_death_test_message("");
  CaptureStderr();
  // When we fork the process below, the log file buffers are copied, but the
  // file descriptors are shared.  We flush all log files here so that closing
  // the file descriptors in the child process doesn't throw off the
  // synchronization between descriptors and buffers in the parent process.
  // This is as close to the fork as possible to avoid a race condition in case
  // there are multiple threads running before the death test, and another
  // thread writes to the log file.
  FlushInfoLog();

  const pid_t child_pid = fork();
  GTEST_DEATH_TEST_CHECK_(child_pid != -1);
  set_child_pid(child_pid);
  if (child_pid == 0) {
    GTEST_DEATH_TEST_CHECK_SYSCALL_(close(pipe_fd[0]));
    set_write_fd(pipe_fd[1]);
    // Redirects all logging to stderr in the child process to prevent
    // concurrent writes to the log files.  We capture stderr in the parent
    // process and append the child process' output to a log.
    LogToStderr();
    // Event forwarding to the listeners of event listener API mush be shut
    // down in death test subprocesses.
    GetUnitTestImpl()->listeners()->SuppressEventForwarding();
    g_in_fast_death_test_child = true;
    return EXECUTE_TEST;
  } else {
    GTEST_DEATH_TEST_CHECK_SYSCALL_(close(pipe_fd[1]));
    set_read_fd(pipe_fd[0]);
    set_spawned(true);
    return OVERSEE_TEST;
  }
}

// A concrete death test class that forks and re-executes the main
// program from the beginning, with command-line flags set that cause
// only this specific death test to be run.
class ExecDeathTest : public ForkingDeathTest {
 public:
  ExecDeathTest(const char* a_statement, const RE* a_regex,
                const char* file, int line) :
      ForkingDeathTest(a_statement, a_regex), file_(file), line_(line) { }
  virtual TestRole AssumeRole();
 private:
  static ::std::vector<testing::internal::string>
  GetArgvsForDeathTestChildProcess() {
    ::std::vector<testing::internal::string> args = GetInjectableArgvs();
#  if defined(GTEST_EXTRA_DEATH_TEST_COMMAND_LINE_ARGS_)
    ::std::vector<testing::internal::string> extra_args =
        GTEST_EXTRA_DEATH_TEST_COMMAND_LINE_ARGS_();
    args.insert(args.end(), extra_args.begin(), extra_args.end());
#  endif  // defined(GTEST_EXTRA_DEATH_TEST_COMMAND_LINE_ARGS_)
    return args;
  }
  // The name of the file in which the death test is located.
  const char* const file_;
  // The line number on which the death test is located.
  const int line_;
};

// Utility class for accumulating command-line arguments.
class Arguments {
 public:
  Arguments() {
    args_.push_back(NULL);
  }

  ~Arguments() {
    for (std::vector<char*>::iterator i = args_.begin(); i != args_.end();
         ++i) {
      free(*i);
    }
  }
  void AddArgument(const char* argument) {
    args_.insert(args_.end() - 1, posix::StrDup(argument));
  }

  template <typename Str>
  void AddArguments(const ::std::vector<Str>& arguments) {
    for (typename ::std::vector<Str>::const_iterator i = arguments.begin();
         i != arguments.end();
         ++i) {
      args_.insert(args_.end() - 1, posix::StrDup(i->c_str()));
    }
  }
  char* const* Argv() {
    return &args_[0];
  }

 private:
  std::vector<char*> args_;
};

// A struct that encompasses the arguments to the child process of a
// threadsafe-style death test process.
struct ExecDeathTestArgs {
  char* const* argv;  // Command-line arguments for the child's call to exec
  int close_fd;       // File descriptor to close; the read end of a pipe
};

#  if GTEST_OS_MAC
inline char** GetEnviron() {
  // When Google Test is built as a framework on MacOS X, the environ variable
  // is unavailable. Apple's documentation (man environ) recommends using
  // _NSGetEnviron() instead.
  return *_NSGetEnviron();
}
#  else
// Some POSIX platforms expect you to declare environ. extern "C" makes
// it reside in the global namespace.
extern "C" char** environ;
inline char** GetEnviron() { return environ; }
#  endif  // GTEST_OS_MAC

#  if !GTEST_OS_QNX
// The main function for a threadsafe-style death test child process.
// This function is called in a clone()-ed process and thus must avoid
// any potentially unsafe operations like malloc or libc functions.
static int ExecDeathTestChildMain(void* child_arg) {
  ExecDeathTestArgs* const args = static_cast<ExecDeathTestArgs*>(child_arg);
  GTEST_DEATH_TEST_CHECK_SYSCALL_(close(args->close_fd));

  // We need to execute the test program in the same environment where
  // it was originally invoked.  Therefore we change to the original
  // working directory first.
  const char* const original_dir =
      UnitTest::GetInstance()->original_working_dir();
  // We can safely call chdir() as it's a direct system call.
  if (chdir(original_dir) != 0) {
    DeathTestAbort(std::string("chdir(\"") + original_dir + "\") failed: " +
                   GetLastErrnoDescription());
    return EXIT_FAILURE;
  }

  // We can safely call execve() as it's a direct system call.  We
  // cannot use execvp() as it's a libc function and thus potentially
  // unsafe.  Since execve() doesn't search the PATH, the user must
  // invoke the test program via a valid path that contains at least
  // one path separator.
  execve(args->argv[0], args->argv, GetEnviron());
  DeathTestAbort(std::string("execve(") + args->argv[0] + ", ...) in " +
                 original_dir + " failed: " +
                 GetLastErrnoDescription());
  return EXIT_FAILURE;
}
#  endif  // !GTEST_OS_QNX

// Two utility routines that together determine the direction the stack
// grows.
// This could be accomplished more elegantly by a single recursive
// function, but we want to guard against the unlikely possibility of
// a smart compiler optimizing the recursion away.
//
// GTEST_NO_INLINE_ is required to prevent GCC 4.6 from inlining
// StackLowerThanAddress into StackGrowsDown, which then doesn't give
// correct answer.
void StackLowerThanAddress(const void* ptr, bool* result) GTEST_NO_INLINE_;
void StackLowerThanAddress(const void* ptr, bool* result) {
  int dummy;
  *result = (&dummy < ptr);
}

// Make sure AddressSanitizer does not tamper with the stack here.
GTEST_ATTRIBUTE_NO_SANITIZE_ADDRESS_
bool StackGrowsDown() {
  int dummy;
  bool result;
  StackLowerThanAddress(&dummy, &result);
  return result;
}

// Spawns a child process with the same executable as the current process in
// a thread-safe manner and instructs it to run the death test.  The
// implementation uses fork(2) + exec.  On systems where clone(2) is
// available, it is used instead, being slightly more thread-safe.  On QNX,
// fork supports only single-threaded environments, so this function uses
// spawn(2) there instead.  The function dies with an error message if
// anything goes wrong.
static pid_t ExecDeathTestSpawnChild(char* const* argv, int close_fd) {
  ExecDeathTestArgs args = { argv, close_fd };
  pid_t child_pid = -1;

#  if GTEST_OS_QNX
  // Obtains the current directory and sets it to be closed in the child
  // process.
  const int cwd_fd = open(".", O_RDONLY);
  GTEST_DEATH_TEST_CHECK_(cwd_fd != -1);
  GTEST_DEATH_TEST_CHECK_SYSCALL_(fcntl(cwd_fd, F_SETFD, FD_CLOEXEC));
  // We need to execute the test program in the same environment where
  // it was originally invoked.  Therefore we change to the original
  // working directory first.
  const char* const original_dir =
      UnitTest::GetInstance()->original_working_dir();
  // We can safely call chdir() as it's a direct system call.
  if (chdir(original_dir) != 0) {
    DeathTestAbort(std::string("chdir(\"") + original_dir + "\") failed: " +
                   GetLastErrnoDescription());
    return EXIT_FAILURE;
  }

  int fd_flags;
  // Set close_fd to be closed after spawn.
  GTEST_DEATH_TEST_CHECK_SYSCALL_(fd_flags = fcntl(close_fd, F_GETFD));
  GTEST_DEATH_TEST_CHECK_SYSCALL_(fcntl(close_fd, F_SETFD,
                                        fd_flags | FD_CLOEXEC));
  struct inheritance inherit = {0};
  // spawn is a system call.
  child_pid = spawn(args.argv[0], 0, NULL, &inherit, args.argv, GetEnviron());
  // Restores the current working directory.
  GTEST_DEATH_TEST_CHECK_(fchdir(cwd_fd) != -1);
  GTEST_DEATH_TEST_CHECK_SYSCALL_(close(cwd_fd));

#  else   // GTEST_OS_QNX
#   if GTEST_OS_LINUX
  // When a SIGPROF signal is received while fork() or clone() are executing,
  // the process may hang. To avoid this, we ignore SIGPROF here and re-enable
  // it after the call to fork()/clone() is complete.
  struct sigaction saved_sigprof_action;
  struct sigaction ignore_sigprof_action;
  memset(&ignore_sigprof_action, 0, sizeof(ignore_sigprof_action));
  sigemptyset(&ignore_sigprof_action.sa_mask);
  ignore_sigprof_action.sa_handler = SIG_IGN;
  GTEST_DEATH_TEST_CHECK_SYSCALL_(sigaction(
      SIGPROF, &ignore_sigprof_action, &saved_sigprof_action));
#   endif  // GTEST_OS_LINUX

#   if GTEST_HAS_CLONE
  const bool use_fork = GTEST_FLAG(death_test_use_fork);

  if (!use_fork) {
    static const bool stack_grows_down = StackGrowsDown();
    const size_t stack_size = getpagesize();
    // MMAP_ANONYMOUS is not defined on Mac, so we use MAP_ANON instead.
    void* const stack = mmap(NULL, stack_size, PROT_READ | PROT_WRITE,
                             MAP_ANON | MAP_PRIVATE, -1, 0);
    GTEST_DEATH_TEST_CHECK_(stack != MAP_FAILED);

    // Maximum stack alignment in bytes:  For a downward-growing stack, this
    // amount is subtracted from size of the stack space to get an address
    // that is within the stack space and is aligned on all systems we care
    // about.  As far as I know there is no ABI with stack alignment greater
    // than 64.  We assume stack and stack_size already have alignment of
    // kMaxStackAlignment.
    const size_t kMaxStackAlignment = 64;
    void* const stack_top =
        static_cast<char*>(stack) +
            (stack_grows_down ? stack_size - kMaxStackAlignment : 0);
    GTEST_DEATH_TEST_CHECK_(stack_size > kMaxStackAlignment &&
        reinterpret_cast<intptr_t>(stack_top) % kMaxStackAlignment == 0);

    child_pid = clone(&ExecDeathTestChildMain, stack_top, SIGCHLD, &args);

    GTEST_DEATH_TEST_CHECK_(munmap(stack, stack_size) != -1);
  }
#   else
  const bool use_fork = true;
#   endif  // GTEST_HAS_CLONE

  if (use_fork && (child_pid = fork()) == 0) {
      ExecDeathTestChildMain(&args);
      _exit(0);
  }
#  endif  // GTEST_OS_QNX
#  if GTEST_OS_LINUX
  GTEST_DEATH_TEST_CHECK_SYSCALL_(
      sigaction(SIGPROF, &saved_sigprof_action, NULL));
#  endif  // GTEST_OS_LINUX

  GTEST_DEATH_TEST_CHECK_(child_pid != -1);
  return child_pid;
}

// The AssumeRole process for a fork-and-exec death test.  It re-executes the
// main program from the beginning, setting the --gtest_filter
// and --gtest_internal_run_death_test flags to cause only the current
// death test to be re-run.
DeathTest::TestRole ExecDeathTest::AssumeRole() {
  const UnitTestImpl* const impl = GetUnitTestImpl();
  const InternalRunDeathTestFlag* const flag =
      impl->internal_run_death_test_flag();
  const TestInfo* const info = impl->current_test_info();
  const int death_test_index = info->result()->death_test_count();

  if (flag != NULL) {
    set_write_fd(flag->write_fd());
    return EXECUTE_TEST;
  }

  int pipe_fd[2];
  GTEST_DEATH_TEST_CHECK_(pipe(pipe_fd) != -1);
  // Clear the close-on-exec flag on the write end of the pipe, lest
  // it be closed when the child process does an exec:
  GTEST_DEATH_TEST_CHECK_(fcntl(pipe_fd[1], F_SETFD, 0) != -1);

  const std::string filter_flag =
      std::string("--") + GTEST_FLAG_PREFIX_ + kFilterFlag + "="
      + info->test_case_name() + "." + info->name();
  const std::string internal_flag =
      std::string("--") + GTEST_FLAG_PREFIX_ + kInternalRunDeathTestFlag + "="
      + file_ + "|" + StreamableToString(line_) + "|"
      + StreamableToString(death_test_index) + "|"
      + StreamableToString(pipe_fd[1]);
  Arguments args;
  args.AddArguments(GetArgvsForDeathTestChildProcess());
  args.AddArgument(filter_flag.c_str());
  args.AddArgument(internal_flag.c_str());

  DeathTest::set_last_death_test_message("");

  CaptureStderr();
  // See the comment in NoExecDeathTest::AssumeRole for why the next line
  // is necessary.
  FlushInfoLog();

  const pid_t child_pid = ExecDeathTestSpawnChild(args.Argv(), pipe_fd[0]);
  GTEST_DEATH_TEST_CHECK_SYSCALL_(close(pipe_fd[1]));
  set_child_pid(child_pid);
  set_read_fd(pipe_fd[0]);
  set_spawned(true);
  return OVERSEE_TEST;
}

# endif  // !GTEST_OS_WINDOWS

// Creates a concrete DeathTest-derived class that depends on the
// --gtest_death_test_style flag, and sets the pointer pointed to
// by the "test" argument to its address.  If the test should be
// skipped, sets that pointer to NULL.  Returns true, unless the
// flag is set to an invalid value.
bool DefaultDeathTestFactory::Create(const char* statement, const RE* regex,
                                     const char* file, int line,
                                     DeathTest** test) {
  UnitTestImpl* const impl = GetUnitTestImpl();
  const InternalRunDeathTestFlag* const flag =
      impl->internal_run_death_test_flag();
  const int death_test_index = impl->current_test_info()
      ->increment_death_test_count();

  if (flag != NULL) {
    if (death_test_index > flag->index()) {
      DeathTest::set_last_death_test_message(
          "Death test count (" + StreamableToString(death_test_index)
          + ") somehow exceeded expected maximum ("
          + StreamableToString(flag->index()) + ")");
      return false;
    }

    if (!(flag->file() == file && flag->line() == line &&
          flag->index() == death_test_index)) {
      *test = NULL;
      return true;
    }
  }

# if GTEST_OS_WINDOWS

  if (GTEST_FLAG(death_test_style) == "threadsafe" ||
      GTEST_FLAG(death_test_style) == "fast") {
    *test = new WindowsDeathTest(statement, regex, file, line);
  }

# else

  if (GTEST_FLAG(death_test_style) == "threadsafe") {
    *test = new ExecDeathTest(statement, regex, file, line);
  } else if (GTEST_FLAG(death_test_style) == "fast") {
    *test = new NoExecDeathTest(statement, regex);
  }

# endif  // GTEST_OS_WINDOWS

  else {  // NOLINT - this is more readable than unbalanced brackets inside #if.
    DeathTest::set_last_death_test_message(
        "Unknown death test style \"" + GTEST_FLAG(death_test_style)
        + "\" encountered");
    return false;
  }

  return true;
}

# if GTEST_OS_WINDOWS
// Recreates the pipe and event handles from the provided parameters,
// signals the event, and returns a file descriptor wrapped around the pipe
// handle. This function is called in the child process only.
int GetStatusFileDescriptor(unsigned int parent_process_id,
                            size_t write_handle_as_size_t,
                            size_t event_handle_as_size_t) {
  AutoHandle parent_process_handle(::OpenProcess(PROCESS_DUP_HANDLE,
                                                   FALSE,  // Non-inheritable.
                                                   parent_process_id));
  if (parent_process_handle.Get() == INVALID_HANDLE_VALUE) {
    DeathTestAbort("Unable to open parent process " +
                   StreamableToString(parent_process_id));
  }

  // TODO(vladl@google.com): Replace the following check with a
  // compile-time assertion when available.
  GTEST_CHECK_(sizeof(HANDLE) <= sizeof(size_t));

  const HANDLE write_handle =
      reinterpret_cast<HANDLE>(write_handle_as_size_t);
  HANDLE dup_write_handle;

  // The newly initialized handle is accessible only in in the parent
  // process. To obtain one accessible within the child, we need to use
  // DuplicateHandle.
  if (!::DuplicateHandle(parent_process_handle.Get(), write_handle,
                         ::GetCurrentProcess(), &dup_write_handle,
                         0x0,    // Requested privileges ignored since
                                 // DUPLICATE_SAME_ACCESS is used.
                         FALSE,  // Request non-inheritable handler.
                         DUPLICATE_SAME_ACCESS)) {
    DeathTestAbort("Unable to duplicate the pipe handle " +
                   StreamableToString(write_handle_as_size_t) +
                   " from the parent process " +
                   StreamableToString(parent_process_id));
  }

  const HANDLE event_handle = reinterpret_cast<HANDLE>(event_handle_as_size_t);
  HANDLE dup_event_handle;

  if (!::DuplicateHandle(parent_process_handle.Get(), event_handle,
                         ::GetCurrentProcess(), &dup_event_handle,
                         0x0,
                         FALSE,
                         DUPLICATE_SAME_ACCESS)) {
    DeathTestAbort("Unable to duplicate the event handle " +
                   StreamableToString(event_handle_as_size_t) +
                   " from the parent process " +
                   StreamableToString(parent_process_id));
  }

  const int write_fd =
      ::_open_osfhandle(reinterpret_cast<intptr_t>(dup_write_handle), O_APPEND);
  if (write_fd == -1) {
    DeathTestAbort("Unable to convert pipe handle " +
                   StreamableToString(write_handle_as_size_t) +
                   " to a file descriptor");
  }

  // Signals the parent that the write end of the pipe has been acquired
  // so the parent can release its own write end.
  ::SetEvent(dup_event_handle);

  return write_fd;
}
# endif  // GTEST_OS_WINDOWS

// Returns a newly created InternalRunDeathTestFlag object with fields
// initialized from the GTEST_FLAG(internal_run_death_test) flag if
// the flag is specified; otherwise returns NULL.
InternalRunDeathTestFlag* ParseInternalRunDeathTestFlag() {
  if (GTEST_FLAG(internal_run_death_test) == "") return NULL;

  // GTEST_HAS_DEATH_TEST implies that we have ::std::string, so we
  // can use it here.
  int line = -1;
  int index = -1;
  ::std::vector< ::std::string> fields;
  SplitString(GTEST_FLAG(internal_run_death_test).c_str(), '|', &fields);
  int write_fd = -1;

# if GTEST_OS_WINDOWS

  unsigned int parent_process_id = 0;
  size_t write_handle_as_size_t = 0;
  size_t event_handle_as_size_t = 0;

  if (fields.size() != 6
      || !ParseNaturalNumber(fields[1], &line)
      || !ParseNaturalNumber(fields[2], &index)
      || !ParseNaturalNumber(fields[3], &parent_process_id)
      || !ParseNaturalNumber(fields[4], &write_handle_as_size_t)
      || !ParseNaturalNumber(fields[5], &event_handle_as_size_t)) {
    DeathTestAbort("Bad --gtest_internal_run_death_test flag: " +
                   GTEST_FLAG(internal_run_death_test));
  }
  write_fd = GetStatusFileDescriptor(parent_process_id,
                                     write_handle_as_size_t,
                                     event_handle_as_size_t);
# else

  if (fields.size() != 4
      || !ParseNaturalNumber(fields[1], &line)
      || !ParseNaturalNumber(fields[2], &index)
      || !ParseNaturalNumber(fields[3], &write_fd)) {
    DeathTestAbort("Bad --gtest_internal_run_death_test flag: "
        + GTEST_FLAG(internal_run_death_test));
  }

# endif  // GTEST_OS_WINDOWS

  return new InternalRunDeathTestFlag(fields[0], line, index, write_fd);
}

}  // namespace internal

#endif  // GTEST_HAS_DEATH_TEST

}  // namespace testing
// Copyright 2008, Google Inc.
// All rights reserved.
//
// Redistribution and use in source and binary forms, with or without
// modification, are permitted provided that the following conditions are
// met:
//
//     * Redistributions of source code must retain the above copyright
// notice, this list of conditions and the following disclaimer.
//     * Redistributions in binary form must reproduce the above
// copyright notice, this list of conditions and the following disclaimer
// in the documentation and/or other materials provided with the
// distribution.
//     * Neither the name of Google Inc. nor the names of its
// contributors may be used to endorse or promote products derived from
// this software without specific prior written permission.
//
// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
// "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
// LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
// A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
// OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
// SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
// LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
// DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
// THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
//
// Authors: keith.ray@gmail.com (Keith Ray)


#include <stdlib.h>

#if GTEST_OS_WINDOWS_MOBILE
# include <windows.h>
#elif GTEST_OS_WINDOWS
# include <direct.h>
# include <io.h>
#elif GTEST_OS_SYMBIAN
// Symbian OpenC has PATH_MAX in sys/syslimits.h
# include <sys/syslimits.h>
#else
# include <limits.h>
# include <climits>  // Some Linux distributions define PATH_MAX here.
#endif  // GTEST_OS_WINDOWS_MOBILE

#if GTEST_OS_WINDOWS
# define GTEST_PATH_MAX_ _MAX_PATH
#elif defined(PATH_MAX)
# define GTEST_PATH_MAX_ PATH_MAX
#elif defined(_XOPEN_PATH_MAX)
# define GTEST_PATH_MAX_ _XOPEN_PATH_MAX
#else
# define GTEST_PATH_MAX_ _POSIX_PATH_MAX
#endif  // GTEST_OS_WINDOWS


namespace testing {
namespace internal {

#if GTEST_OS_WINDOWS
// On Windows, '\\' is the standard path separator, but many tools and the
// Windows API also accept '/' as an alternate path separator. Unless otherwise
// noted, a file path can contain either kind of path separators, or a mixture
// of them.
const char kPathSeparator = '\\';
const char kAlternatePathSeparator = '/';
const char kAlternatePathSeparatorString[] = "/";
# if GTEST_OS_WINDOWS_MOBILE
// Windows CE doesn't have a current directory. You should not use
// the current directory in tests on Windows CE, but this at least
// provides a reasonable fallback.
const char kCurrentDirectoryString[] = "\\";
// Windows CE doesn't define INVALID_FILE_ATTRIBUTES
const DWORD kInvalidFileAttributes = 0xffffffff;
# else
const char kCurrentDirectoryString[] = ".\\";
# endif  // GTEST_OS_WINDOWS_MOBILE
#else
const char kPathSeparator = '/';
const char kCurrentDirectoryString[] = "./";
#endif  // GTEST_OS_WINDOWS

// Returns whether the given character is a valid path separator.
static bool IsPathSeparator(char c) {
#if GTEST_HAS_ALT_PATH_SEP_
  return (c == kPathSeparator) || (c == kAlternatePathSeparator);
#else
  return c == kPathSeparator;
#endif
}

// Returns the current working directory, or "" if unsuccessful.
FilePath FilePath::GetCurrentDir() {
#if GTEST_OS_WINDOWS_MOBILE || GTEST_OS_WINDOWS_PHONE || GTEST_OS_WINDOWS_RT
  // Windows CE doesn't have a current directory, so we just return
  // something reasonable.
  return FilePath(kCurrentDirectoryString);
#elif GTEST_OS_WINDOWS
  char cwd[GTEST_PATH_MAX_ + 1] = { '\0' };
  return FilePath(_getcwd(cwd, sizeof(cwd)) == NULL ? "" : cwd);
#else
  char cwd[GTEST_PATH_MAX_ + 1] = { '\0' };
  char* result = getcwd(cwd, sizeof(cwd));
# if GTEST_OS_NACL
  // getcwd will likely fail in NaCl due to the sandbox, so return something
  // reasonable. The user may have provided a shim implementation for getcwd,
  // however, so fallback only when failure is detected.
  return FilePath(result == NULL ? kCurrentDirectoryString : cwd);
# endif  // GTEST_OS_NACL
  return FilePath(result == NULL ? "" : cwd);
#endif  // GTEST_OS_WINDOWS_MOBILE
}

// Returns a copy of the FilePath with the case-insensitive extension removed.
// Example: FilePath("dir/file.exe").RemoveExtension("EXE") returns
// FilePath("dir/file"). If a case-insensitive extension is not
// found, returns a copy of the original FilePath.
FilePath FilePath::RemoveExtension(const char* extension) const {
  const std::string dot_extension = std::string(".") + extension;
  if (String::EndsWithCaseInsensitive(pathname_, dot_extension)) {
    return FilePath(pathname_.substr(
        0, pathname_.length() - dot_extension.length()));
  }
  return *this;
}

// Returns a pointer to the last occurence of a valid path separator in
// the FilePath. On Windows, for example, both '/' and '\' are valid path
// separators. Returns NULL if no path separator was found.
const char* FilePath::FindLastPathSeparator() const {
  const char* const last_sep = strrchr(c_str(), kPathSeparator);
#if GTEST_HAS_ALT_PATH_SEP_
  const char* const last_alt_sep = strrchr(c_str(), kAlternatePathSeparator);
  // Comparing two pointers of which only one is NULL is undefined.
  if (last_alt_sep != NULL &&
      (last_sep == NULL || last_alt_sep > last_sep)) {
    return last_alt_sep;
  }
#endif
  return last_sep;
}

// Returns a copy of the FilePath with the directory part removed.
// Example: FilePath("path/to/file").RemoveDirectoryName() returns
// FilePath("file"). If there is no directory part ("just_a_file"), it returns
// the FilePath unmodified. If there is no file part ("just_a_dir/") it
// returns an empty FilePath ("").
// On Windows platform, '\' is the path separator, otherwise it is '/'.
FilePath FilePath::RemoveDirectoryName() const {
  const char* const last_sep = FindLastPathSeparator();
  return last_sep ? FilePath(last_sep + 1) : *this;
}

// RemoveFileName returns the directory path with the filename removed.
// Example: FilePath("path/to/file").RemoveFileName() returns "path/to/".
// If the FilePath is "a_file" or "/a_file", RemoveFileName returns
// FilePath("./") or, on Windows, FilePath(".\\"). If the filepath does
// not have a file, like "just/a/dir/", it returns the FilePath unmodified.
// On Windows platform, '\' is the path separator, otherwise it is '/'.
FilePath FilePath::RemoveFileName() const {
  const char* const last_sep = FindLastPathSeparator();
  std::string dir;
  if (last_sep) {
    dir = std::string(c_str(), last_sep + 1 - c_str());
  } else {
    dir = kCurrentDirectoryString;
  }
  return FilePath(dir);
}

// Helper functions for naming files in a directory for xml output.

// Given directory = "dir", base_name = "test", number = 0,
// extension = "xml", returns "dir/test.xml". If number is greater
// than zero (e.g., 12), returns "dir/test_12.xml".
// On Windows platform, uses \ as the separator rather than /.
FilePath FilePath::MakeFileName(const FilePath& directory,
                                const FilePath& base_name,
                                int number,
                                const char* extension) {
  std::string file;
  if (number == 0) {
    file = base_name.string() + "." + extension;
  } else {
    file = base_name.string() + "_" + StreamableToString(number)
        + "." + extension;
  }
  return ConcatPaths(directory, FilePath(file));
}

// Given directory = "dir", relative_path = "test.xml", returns "dir/test.xml".
// On Windows, uses \ as the separator rather than /.
FilePath FilePath::ConcatPaths(const FilePath& directory,
                               const FilePath& relative_path) {
  if (directory.IsEmpty())
    return relative_path;
  const FilePath dir(directory.RemoveTrailingPathSeparator());
  return FilePath(dir.string() + kPathSeparator + relative_path.string());
}

// Returns true if pathname describes something findable in the file-system,
// either a file, directory, or whatever.
bool FilePath::FileOrDirectoryExists() const {
#if GTEST_OS_WINDOWS_MOBILE
  LPCWSTR unicode = String::AnsiToUtf16(pathname_.c_str());
  const DWORD attributes = GetFileAttributes(unicode);
  delete [] unicode;
  return attributes != kInvalidFileAttributes;
#else
  posix::StatStruct file_stat;
  return posix::Stat(pathname_.c_str(), &file_stat) == 0;
#endif  // GTEST_OS_WINDOWS_MOBILE
}

// Returns true if pathname describes a directory in the file-system
// that exists.
bool FilePath::DirectoryExists() const {
  bool result = false;
#if GTEST_OS_WINDOWS
  // Don't strip off trailing separator if path is a root directory on
  // Windows (like "C:\\").
  const FilePath& path(IsRootDirectory() ? *this :
                                           RemoveTrailingPathSeparator());
#else
  const FilePath& path(*this);
#endif

#if GTEST_OS_WINDOWS_MOBILE
  LPCWSTR unicode = String::AnsiToUtf16(path.c_str());
  const DWORD attributes = GetFileAttributes(unicode);
  delete [] unicode;
  if ((attributes != kInvalidFileAttributes) &&
      (attributes & FILE_ATTRIBUTE_DIRECTORY)) {
    result = true;
  }
#else
  posix::StatStruct file_stat;
  result = posix::Stat(path.c_str(), &file_stat) == 0 &&
      posix::IsDir(file_stat);
#endif  // GTEST_OS_WINDOWS_MOBILE

  return result;
}

// Returns true if pathname describes a root directory. (Windows has one
// root directory per disk drive.)
bool FilePath::IsRootDirectory() const {
#if GTEST_OS_WINDOWS
  // TODO(wan@google.com): on Windows a network share like
  // \\server\share can be a root directory, although it cannot be the
  // current directory.  Handle this properly.
  return pathname_.length() == 3 && IsAbsolutePath();
#else
  return pathname_.length() == 1 && IsPathSeparator(pathname_.c_str()[0]);
#endif
}

// Returns true if pathname describes an absolute path.
bool FilePath::IsAbsolutePath() const {
  const char* const name = pathname_.c_str();
#if GTEST_OS_WINDOWS
  return pathname_.length() >= 3 &&
     ((name[0] >= 'a' && name[0] <= 'z') ||
      (name[0] >= 'A' && name[0] <= 'Z')) &&
     name[1] == ':' &&
     IsPathSeparator(name[2]);
#else
  return IsPathSeparator(name[0]);
#endif
}

// Returns a pathname for a file that does not currently exist. The pathname
// will be directory/base_name.extension or
// directory/base_name_<number>.extension if directory/base_name.extension
// already exists. The number will be incremented until a pathname is found
// that does not already exist.
// Examples: 'dir/foo_test.xml' or 'dir/foo_test_1.xml'.
// There could be a race condition if two or more processes are calling this
// function at the same time -- they could both pick the same filename.
FilePath FilePath::GenerateUniqueFileName(const FilePath& directory,
                                          const FilePath& base_name,
                                          const char* extension) {
  FilePath full_pathname;
  int number = 0;
  do {
    full_pathname.Set(MakeFileName(directory, base_name, number++, extension));
  } while (full_pathname.FileOrDirectoryExists());
  return full_pathname;
}

// Returns true if FilePath ends with a path separator, which indicates that
// it is intended to represent a directory. Returns false otherwise.
// This does NOT check that a directory (or file) actually exists.
bool FilePath::IsDirectory() const {
  return !pathname_.empty() &&
         IsPathSeparator(pathname_.c_str()[pathname_.length() - 1]);
}

// Create directories so that path exists. Returns true if successful or if
// the directories already exist; returns false if unable to create directories
// for any reason.
bool FilePath::CreateDirectoriesRecursively() const {
  if (!this->IsDirectory()) {
    return false;
  }

  if (pathname_.length() == 0 || this->DirectoryExists()) {
    return true;
  }

  const FilePath parent(this->RemoveTrailingPathSeparator().RemoveFileName());
  return parent.CreateDirectoriesRecursively() && this->CreateFolder();
}

// Create the directory so that path exists. Returns true if successful or
// if the directory already exists; returns false if unable to create the
// directory for any reason, including if the parent directory does not
// exist. Not named "CreateDirectory" because that's a macro on Windows.
bool FilePath::CreateFolder() const {
#if GTEST_OS_WINDOWS_MOBILE
  FilePath removed_sep(this->RemoveTrailingPathSeparator());
  LPCWSTR unicode = String::AnsiToUtf16(removed_sep.c_str());
  int result = CreateDirectory(unicode, NULL) ? 0 : -1;
  delete [] unicode;
#elif GTEST_OS_WINDOWS
  int result = _mkdir(pathname_.c_str());
#else
  int result = mkdir(pathname_.c_str(), 0777);
#endif  // GTEST_OS_WINDOWS_MOBILE

  if (result == -1) {
    return this->DirectoryExists();  // An error is OK if the directory exists.
  }
  return true;  // No error.
}

// If input name has a trailing separator character, remove it and return the
// name, otherwise return the name string unmodified.
// On Windows platform, uses \ as the separator, other platforms use /.
FilePath FilePath::RemoveTrailingPathSeparator() const {
  return IsDirectory()
      ? FilePath(pathname_.substr(0, pathname_.length() - 1))
      : *this;
}

// Removes any redundant separators that might be in the pathname.
// For example, "bar///foo" becomes "bar/foo". Does not eliminate other
// redundancies that might be in a pathname involving "." or "..".
// TODO(wan@google.com): handle Windows network shares (e.g. \\server\share).
void FilePath::Normalize() {
  if (pathname_.c_str() == NULL) {
    pathname_ = "";
    return;
  }
  const char* src = pathname_.c_str();
  char* const dest = new char[pathname_.length() + 1];
  char* dest_ptr = dest;
  memset(dest_ptr, 0, pathname_.length() + 1);

  while (*src != '\0') {
    *dest_ptr = *src;
    if (!IsPathSeparator(*src)) {
      src++;
    } else {
#if GTEST_HAS_ALT_PATH_SEP_
      if (*dest_ptr == kAlternatePathSeparator) {
        *dest_ptr = kPathSeparator;
      }
#endif
      while (IsPathSeparator(*src))
        src++;
    }
    dest_ptr++;
  }
  *dest_ptr = '\0';
  pathname_ = dest;
  delete[] dest;
}

}  // namespace internal
}  // namespace testing
// Copyright 2008, Google Inc.
// All rights reserved.
//
// Redistribution and use in source and binary forms, with or without
// modification, are permitted provided that the following conditions are
// met:
//
//     * Redistributions of source code must retain the above copyright
// notice, this list of conditions and the following disclaimer.
//     * Redistributions in binary form must reproduce the above
// copyright notice, this list of conditions and the following disclaimer
// in the documentation and/or other materials provided with the
// distribution.
//     * Neither the name of Google Inc. nor the names of its
// contributors may be used to endorse or promote products derived from
// this software without specific prior written permission.
//
// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
// "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
// LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
// A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
// OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
// SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
// LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
// DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
// THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
//
// Author: wan@google.com (Zhanyong Wan)


#include <limits.h>
#include <stdlib.h>
#include <stdio.h>
#include <string.h>
#include <fstream>

#if GTEST_OS_WINDOWS
# include <windows.h>
# include <io.h>
# include <sys/stat.h>
# include <map>  // Used in ThreadLocal.
#else
# include <unistd.h>
#endif  // GTEST_OS_WINDOWS

#if GTEST_OS_MAC
# include <mach/mach_init.h>
# include <mach/task.h>
# include <mach/vm_map.h>
#endif  // GTEST_OS_MAC

#if GTEST_OS_QNX
# include <devctl.h>
# include <fcntl.h>
# include <sys/procfs.h>
#endif  // GTEST_OS_QNX

#if GTEST_OS_AIX
# include <procinfo.h>
# include <sys/types.h>
#endif  // GTEST_OS_AIX


// Indicates that this translation unit is part of Google Test's
// implementation.  It must come before gtest-internal-inl.h is
// included, or there will be a compiler error.  This trick exists to
// prevent the accidental inclusion of gtest-internal-inl.h in the
// user's code.
#define GTEST_IMPLEMENTATION_ 1
#undef GTEST_IMPLEMENTATION_

namespace testing {
namespace internal {

#if defined(_MSC_VER) || defined(__BORLANDC__)
// MSVC and C++Builder do not provide a definition of STDERR_FILENO.
const int kStdOutFileno = 1;
const int kStdErrFileno = 2;
#else
const int kStdOutFileno = STDOUT_FILENO;
const int kStdErrFileno = STDERR_FILENO;
#endif  // _MSC_VER

#if GTEST_OS_LINUX

namespace {
template <typename T>
T ReadProcFileField(const string& filename, int field) {
  std::string dummy;
  std::ifstream file(filename.c_str());
  while (field-- > 0) {
    file >> dummy;
  }
  T output = 0;
  file >> output;
  return output;
}
}  // namespace

// Returns the number of active threads, or 0 when there is an error.
size_t GetThreadCount() {
  const string filename =
      (Message() << "/proc/" << getpid() << "/stat").GetString();
  return ReadProcFileField<int>(filename, 19);
}

#elif GTEST_OS_MAC

size_t GetThreadCount() {
  const task_t task = mach_task_self();
  mach_msg_type_number_t thread_count;
  thread_act_array_t thread_list;
  const kern_return_t status = task_threads(task, &thread_list, &thread_count);
  if (status == KERN_SUCCESS) {
    // task_threads allocates resources in thread_list and we need to free them
    // to avoid leaks.
    vm_deallocate(task,
                  reinterpret_cast<vm_address_t>(thread_list),
                  sizeof(thread_t) * thread_count);
    return static_cast<size_t>(thread_count);
  } else {
    return 0;
  }
}

#elif GTEST_OS_QNX

// Returns the number of threads running in the process, or 0 to indicate that
// we cannot detect it.
size_t GetThreadCount() {
  const int fd = open("/proc/self/as", O_RDONLY);
  if (fd < 0) {
    return 0;
  }
  procfs_info process_info;
  const int status =
      devctl(fd, DCMD_PROC_INFO, &process_info, sizeof(process_info), NULL);
  close(fd);
  if (status == EOK) {
    return static_cast<size_t>(process_info.num_threads);
  } else {
    return 0;
  }
}

#elif GTEST_OS_AIX

size_t GetThreadCount() {
  struct procentry64 entry;
  pid_t pid = getpid();
  int status = getprocs64(&entry, sizeof(entry), NULL, 0, &pid, 1);
  if (status == 1) {
    return entry.pi_thcount;
  } else {
    return 0;
  }
}

#else

size_t GetThreadCount() {
  // There's no portable way to detect the number of threads, so we just
  // return 0 to indicate that we cannot detect it.
  return 0;
}

#endif  // GTEST_OS_LINUX

#if GTEST_IS_THREADSAFE && GTEST_OS_WINDOWS

void SleepMilliseconds(int n) {
  ::Sleep(n);
}

AutoHandle::AutoHandle()
    : handle_(INVALID_HANDLE_VALUE) {}

AutoHandle::AutoHandle(Handle handle)
    : handle_(handle) {}

AutoHandle::~AutoHandle() {
  Reset();
}

AutoHandle::Handle AutoHandle::Get() const {
  return handle_;
}

void AutoHandle::Reset() {
  Reset(INVALID_HANDLE_VALUE);
}

void AutoHandle::Reset(HANDLE handle) {
  // Resetting with the same handle we already own is invalid.
  if (handle_ != handle) {
    if (IsCloseable()) {
      ::CloseHandle(handle_);
    }
    handle_ = handle;
  } else {
    GTEST_CHECK_(!IsCloseable())
        << "Resetting a valid handle to itself is likely a programmer error "
            "and thus not allowed.";
  }
}

bool AutoHandle::IsCloseable() const {
  // Different Windows APIs may use either of these values to represent an
  // invalid handle.
  return handle_ != NULL && handle_ != INVALID_HANDLE_VALUE;
}

Notification::Notification()
    : event_(::CreateEvent(NULL,   // Default security attributes.
                           TRUE,   // Do not reset automatically.
                           FALSE,  // Initially unset.
                           NULL)) {  // Anonymous event.
  GTEST_CHECK_(event_.Get() != NULL);
}

void Notification::Notify() {
  GTEST_CHECK_(::SetEvent(event_.Get()) != FALSE);
}

void Notification::WaitForNotification() {
  GTEST_CHECK_(
      ::WaitForSingleObject(event_.Get(), INFINITE) == WAIT_OBJECT_0);
}

Mutex::Mutex()
    : owner_thread_id_(0),
      type_(kDynamic),
      critical_section_init_phase_(0),
      critical_section_(new CRITICAL_SECTION) {
  ::InitializeCriticalSection(critical_section_);
}

Mutex::~Mutex() {
  // Static mutexes are leaked intentionally. It is not thread-safe to try
  // to clean them up.
  // TODO(yukawa): Switch to Slim Reader/Writer (SRW) Locks, which requires
  // nothing to clean it up but is available only on Vista and later.
  // http://msdn.microsoft.com/en-us/library/windows/desktop/aa904937.aspx
  if (type_ == kDynamic) {
    ::DeleteCriticalSection(critical_section_);
    delete critical_section_;
    critical_section_ = NULL;
  }
}

void Mutex::Lock() {
  ThreadSafeLazyInit();
  ::EnterCriticalSection(critical_section_);
  owner_thread_id_ = ::GetCurrentThreadId();
}

void Mutex::Unlock() {
  ThreadSafeLazyInit();
  // We don't protect writing to owner_thread_id_ here, as it's the
  // caller's responsibility to ensure that the current thread holds the
  // mutex when this is called.
  owner_thread_id_ = 0;
  ::LeaveCriticalSection(critical_section_);
}

// Does nothing if the current thread holds the mutex. Otherwise, crashes
// with high probability.
void Mutex::AssertHeld() {
  ThreadSafeLazyInit();
  GTEST_CHECK_(owner_thread_id_ == ::GetCurrentThreadId())
      << "The current thread is not holding the mutex @" << this;
}

// Initializes owner_thread_id_ and critical_section_ in static mutexes.
void Mutex::ThreadSafeLazyInit() {
  // Dynamic mutexes are initialized in the constructor.
  if (type_ == kStatic) {
    switch (
        ::InterlockedCompareExchange(&critical_section_init_phase_, 1L, 0L)) {
      case 0:
        // If critical_section_init_phase_ was 0 before the exchange, we
        // are the first to test it and need to perform the initialization.
        owner_thread_id_ = 0;
        critical_section_ = new CRITICAL_SECTION;
        ::InitializeCriticalSection(critical_section_);
        // Updates the critical_section_init_phase_ to 2 to signal
        // initialization complete.
        GTEST_CHECK_(::InterlockedCompareExchange(
                          &critical_section_init_phase_, 2L, 1L) ==
                      1L);
        break;
      case 1:
        // Somebody else is already initializing the mutex; spin until they
        // are done.
        while (::InterlockedCompareExchange(&critical_section_init_phase_,
                                            2L,
                                            2L) != 2L) {
          // Possibly yields the rest of the thread's time slice to other
          // threads.
          ::Sleep(0);
        }
        break;

      case 2:
        break;  // The mutex is already initialized and ready for use.

      default:
        GTEST_CHECK_(false)
            << "Unexpected value of critical_section_init_phase_ "
            << "while initializing a static mutex.";
    }
  }
}

namespace {

class ThreadWithParamSupport : public ThreadWithParamBase {
 public:
  static HANDLE CreateThread(Runnable* runnable,
                             Notification* thread_can_start) {
    ThreadMainParam* param = new ThreadMainParam(runnable, thread_can_start);
    DWORD thread_id;
    // TODO(yukawa): Consider to use _beginthreadex instead.
    HANDLE thread_handle = ::CreateThread(
        NULL,    // Default security.
        0,       // Default stack size.
        &ThreadWithParamSupport::ThreadMain,
        param,   // Parameter to ThreadMainStatic
        0x0,     // Default creation flags.
        &thread_id);  // Need a valid pointer for the call to work under Win98.
    GTEST_CHECK_(thread_handle != NULL) << "CreateThread failed with error "
                                        << ::GetLastError() << ".";
    if (thread_handle == NULL) {
      delete param;
    }
    return thread_handle;
  }

 private:
  struct ThreadMainParam {
    ThreadMainParam(Runnable* runnable, Notification* thread_can_start)
        : runnable_(runnable),
          thread_can_start_(thread_can_start) {
    }
    scoped_ptr<Runnable> runnable_;
    // Does not own.
    Notification* thread_can_start_;
  };

  static DWORD WINAPI ThreadMain(void* ptr) {
    // Transfers ownership.
    scoped_ptr<ThreadMainParam> param(static_cast<ThreadMainParam*>(ptr));
    if (param->thread_can_start_ != NULL)
      param->thread_can_start_->WaitForNotification();
    param->runnable_->Run();
    return 0;
  }

  // Prohibit instantiation.
  ThreadWithParamSupport();

  GTEST_DISALLOW_COPY_AND_ASSIGN_(ThreadWithParamSupport);
};

}  // namespace

ThreadWithParamBase::ThreadWithParamBase(Runnable *runnable,
                                         Notification* thread_can_start)
      : thread_(ThreadWithParamSupport::CreateThread(runnable,
                                                     thread_can_start)) {
}

ThreadWithParamBase::~ThreadWithParamBase() {
  Join();
}

void ThreadWithParamBase::Join() {
  GTEST_CHECK_(::WaitForSingleObject(thread_.Get(), INFINITE) == WAIT_OBJECT_0)
      << "Failed to join the thread with error " << ::GetLastError() << ".";
}

// Maps a thread to a set of ThreadIdToThreadLocals that have values
// instantiated on that thread and notifies them when the thread exits.  A
// ThreadLocal instance is expected to persist until all threads it has
// values on have terminated.
class ThreadLocalRegistryImpl {
 public:
  // Registers thread_local_instance as having value on the current thread.
  // Returns a value that can be used to identify the thread from other threads.
  static ThreadLocalValueHolderBase* GetValueOnCurrentThread(
      const ThreadLocalBase* thread_local_instance) {
    DWORD current_thread = ::GetCurrentThreadId();
    MutexLock lock(&mutex_);
    ThreadIdToThreadLocals* const thread_to_thread_locals =
        GetThreadLocalsMapLocked();
    ThreadIdToThreadLocals::iterator thread_local_pos =
        thread_to_thread_locals->find(current_thread);
    if (thread_local_pos == thread_to_thread_locals->end()) {
      thread_local_pos = thread_to_thread_locals->insert(
          std::make_pair(current_thread, ThreadLocalValues())).first;
      StartWatcherThreadFor(current_thread);
    }
    ThreadLocalValues& thread_local_values = thread_local_pos->second;
    ThreadLocalValues::iterator value_pos =
        thread_local_values.find(thread_local_instance);
    if (value_pos == thread_local_values.end()) {
      value_pos =
          thread_local_values
              .insert(std::make_pair(
                  thread_local_instance,
                  linked_ptr<ThreadLocalValueHolderBase>(
                      thread_local_instance->NewValueForCurrentThread())))
              .first;
    }
    return value_pos->second.get();
  }

  static void OnThreadLocalDestroyed(
      const ThreadLocalBase* thread_local_instance) {
    std::vector<linked_ptr<ThreadLocalValueHolderBase> > value_holders;
    // Clean up the ThreadLocalValues data structure while holding the lock, but
    // defer the destruction of the ThreadLocalValueHolderBases.
    {
      MutexLock lock(&mutex_);
      ThreadIdToThreadLocals* const thread_to_thread_locals =
          GetThreadLocalsMapLocked();
      for (ThreadIdToThreadLocals::iterator it =
          thread_to_thread_locals->begin();
          it != thread_to_thread_locals->end();
          ++it) {
        ThreadLocalValues& thread_local_values = it->second;
        ThreadLocalValues::iterator value_pos =
            thread_local_values.find(thread_local_instance);
        if (value_pos != thread_local_values.end()) {
          value_holders.push_back(value_pos->second);
          thread_local_values.erase(value_pos);
          // This 'if' can only be successful at most once, so theoretically we
          // could break out of the loop here, but we don't bother doing so.
        }
      }
    }
    // Outside the lock, let the destructor for 'value_holders' deallocate the
    // ThreadLocalValueHolderBases.
  }

  static void OnThreadExit(DWORD thread_id) {
    GTEST_CHECK_(thread_id != 0) << ::GetLastError();
    std::vector<linked_ptr<ThreadLocalValueHolderBase> > value_holders;
    // Clean up the ThreadIdToThreadLocals data structure while holding the
    // lock, but defer the destruction of the ThreadLocalValueHolderBases.
    {
      MutexLock lock(&mutex_);
      ThreadIdToThreadLocals* const thread_to_thread_locals =
          GetThreadLocalsMapLocked();
      ThreadIdToThreadLocals::iterator thread_local_pos =
          thread_to_thread_locals->find(thread_id);
      if (thread_local_pos != thread_to_thread_locals->end()) {
        ThreadLocalValues& thread_local_values = thread_local_pos->second;
        for (ThreadLocalValues::iterator value_pos =
            thread_local_values.begin();
            value_pos != thread_local_values.end();
            ++value_pos) {
          value_holders.push_back(value_pos->second);
        }
        thread_to_thread_locals->erase(thread_local_pos);
      }
    }
    // Outside the lock, let the destructor for 'value_holders' deallocate the
    // ThreadLocalValueHolderBases.
  }

 private:
  // In a particular thread, maps a ThreadLocal object to its value.
  typedef std::map<const ThreadLocalBase*,
                   linked_ptr<ThreadLocalValueHolderBase> > ThreadLocalValues;
  // Stores all ThreadIdToThreadLocals having values in a thread, indexed by
  // thread's ID.
  typedef std::map<DWORD, ThreadLocalValues> ThreadIdToThreadLocals;

  // Holds the thread id and thread handle that we pass from
  // StartWatcherThreadFor to WatcherThreadFunc.
  typedef std::pair<DWORD, HANDLE> ThreadIdAndHandle;

  static void StartWatcherThreadFor(DWORD thread_id) {
    // The returned handle will be kept in thread_map and closed by
    // watcher_thread in WatcherThreadFunc.
    HANDLE thread = ::OpenThread(SYNCHRONIZE | THREAD_QUERY_INFORMATION,
                                 FALSE,
                                 thread_id);
    GTEST_CHECK_(thread != NULL);
    // We need to to pass a valid thread ID pointer into CreateThread for it
    // to work correctly under Win98.
    DWORD watcher_thread_id;
    HANDLE watcher_thread = ::CreateThread(
        NULL,   // Default security.
        0,      // Default stack size
        &ThreadLocalRegistryImpl::WatcherThreadFunc,
        reinterpret_cast<LPVOID>(new ThreadIdAndHandle(thread_id, thread)),
        CREATE_SUSPENDED,
        &watcher_thread_id);
    GTEST_CHECK_(watcher_thread != NULL);
    // Give the watcher thread the same priority as ours to avoid being
    // blocked by it.
    ::SetThreadPriority(watcher_thread,
                        ::GetThreadPriority(::GetCurrentThread()));
    ::ResumeThread(watcher_thread);
    ::CloseHandle(watcher_thread);
  }

  // Monitors exit from a given thread and notifies those
  // ThreadIdToThreadLocals about thread termination.
  static DWORD WINAPI WatcherThreadFunc(LPVOID param) {
    const ThreadIdAndHandle* tah =
        reinterpret_cast<const ThreadIdAndHandle*>(param);
    GTEST_CHECK_(
        ::WaitForSingleObject(tah->second, INFINITE) == WAIT_OBJECT_0);
    OnThreadExit(tah->first);
    ::CloseHandle(tah->second);
    delete tah;
    return 0;
  }

  // Returns map of thread local instances.
  static ThreadIdToThreadLocals* GetThreadLocalsMapLocked() {
    mutex_.AssertHeld();
    static ThreadIdToThreadLocals* map = new ThreadIdToThreadLocals;
    return map;
  }

  // Protects access to GetThreadLocalsMapLocked() and its return value.
  static Mutex mutex_;
  // Protects access to GetThreadMapLocked() and its return value.
  static Mutex thread_map_mutex_;
};

Mutex ThreadLocalRegistryImpl::mutex_(Mutex::kStaticMutex);
Mutex ThreadLocalRegistryImpl::thread_map_mutex_(Mutex::kStaticMutex);

ThreadLocalValueHolderBase* ThreadLocalRegistry::GetValueOnCurrentThread(
      const ThreadLocalBase* thread_local_instance) {
  return ThreadLocalRegistryImpl::GetValueOnCurrentThread(
      thread_local_instance);
}

void ThreadLocalRegistry::OnThreadLocalDestroyed(
      const ThreadLocalBase* thread_local_instance) {
  ThreadLocalRegistryImpl::OnThreadLocalDestroyed(thread_local_instance);
}

#endif  // GTEST_IS_THREADSAFE && GTEST_OS_WINDOWS

#if GTEST_USES_POSIX_RE

// Implements RE.  Currently only needed for death tests.

RE::~RE() {
  if (is_valid_) {
    // regfree'ing an invalid regex might crash because the content
    // of the regex is undefined. Since the regex's are essentially
    // the same, one cannot be valid (or invalid) without the other
    // being so too.
    regfree(&partial_regex_);
    regfree(&full_regex_);
  }
  free(const_cast<char*>(pattern_));
}

// Returns true iff regular expression re matches the entire str.
bool RE::FullMatch(const char* str, const RE& re) {
  if (!re.is_valid_) return false;

  regmatch_t match;
  return regexec(&re.full_regex_, str, 1, &match, 0) == 0;
}

// Returns true iff regular expression re matches a substring of str
// (including str itself).
bool RE::PartialMatch(const char* str, const RE& re) {
  if (!re.is_valid_) return false;

  regmatch_t match;
  return regexec(&re.partial_regex_, str, 1, &match, 0) == 0;
}

// Initializes an RE from its string representation.
void RE::Init(const char* regex) {
  pattern_ = posix::StrDup(regex);

  // Reserves enough bytes to hold the regular expression used for a
  // full match.
  const size_t full_regex_len = strlen(regex) + 10;
  char* const full_pattern = new char[full_regex_len];

  snprintf(full_pattern, full_regex_len, "^(%s)$", regex);
  is_valid_ = regcomp(&full_regex_, full_pattern, REG_EXTENDED) == 0;
  // We want to call regcomp(&partial_regex_, ...) even if the
  // previous expression returns false.  Otherwise partial_regex_ may
  // not be properly initialized can may cause trouble when it's
  // freed.
  //
  // Some implementation of POSIX regex (e.g. on at least some
  // versions of Cygwin) doesn't accept the empty string as a valid
  // regex.  We change it to an equivalent form "()" to be safe.
  if (is_valid_) {
    const char* const partial_regex = (*regex == '\0') ? "()" : regex;
    is_valid_ = regcomp(&partial_regex_, partial_regex, REG_EXTENDED) == 0;
  }
  EXPECT_TRUE(is_valid_)
      << "Regular expression \"" << regex
      << "\" is not a valid POSIX Extended regular expression.";

  delete[] full_pattern;
}

#elif GTEST_USES_SIMPLE_RE

// Returns true iff ch appears anywhere in str (excluding the
// terminating '\0' character).
bool IsInSet(char ch, const char* str) {
  return ch != '\0' && strchr(str, ch) != NULL;
}

// Returns true iff ch belongs to the given classification.  Unlike
// similar functions in <ctype.h>, these aren't affected by the
// current locale.
bool IsAsciiDigit(char ch) { return '0' <= ch && ch <= '9'; }
bool IsAsciiPunct(char ch) {
  return IsInSet(ch, "^-!\"#$%&'()*+,./:;<=>?@[\\]_`{|}~");
}
bool IsRepeat(char ch) { return IsInSet(ch, "?*+"); }
bool IsAsciiWhiteSpace(char ch) { return IsInSet(ch, " \f\n\r\t\v"); }
bool IsAsciiWordChar(char ch) {
  return ('a' <= ch && ch <= 'z') || ('A' <= ch && ch <= 'Z') ||
      ('0' <= ch && ch <= '9') || ch == '_';
}

// Returns true iff "\\c" is a supported escape sequence.
bool IsValidEscape(char c) {
  return (IsAsciiPunct(c) || IsInSet(c, "dDfnrsStvwW"));
}

// Returns true iff the given atom (specified by escaped and pattern)
// matches ch.  The result is undefined if the atom is invalid.
bool AtomMatchesChar(bool escaped, char pattern_char, char ch) {
  if (escaped) {  // "\\p" where p is pattern_char.
    switch (pattern_char) {
      case 'd': return IsAsciiDigit(ch);
      case 'D': return !IsAsciiDigit(ch);
      case 'f': return ch == '\f';
      case 'n': return ch == '\n';
      case 'r': return ch == '\r';
      case 's': return IsAsciiWhiteSpace(ch);
      case 'S': return !IsAsciiWhiteSpace(ch);
      case 't': return ch == '\t';
      case 'v': return ch == '\v';
      case 'w': return IsAsciiWordChar(ch);
      case 'W': return !IsAsciiWordChar(ch);
    }
    return IsAsciiPunct(pattern_char) && pattern_char == ch;
  }

  return (pattern_char == '.' && ch != '\n') || pattern_char == ch;
}

// Helper function used by ValidateRegex() to format error messages.
std::string FormatRegexSyntaxError(const char* regex, int index) {
  return (Message() << "Syntax error at index " << index
          << " in simple regular expression \"" << regex << "\": ").GetString();
}

// Generates non-fatal failures and returns false if regex is invalid;
// otherwise returns true.
bool ValidateRegex(const char* regex) {
  if (regex == NULL) {
    // TODO(wan@google.com): fix the source file location in the
    // assertion failures to match where the regex is used in user
    // code.
    ADD_FAILURE() << "NULL is not a valid simple regular expression.";
    return false;
  }

  bool is_valid = true;

  // True iff ?, *, or + can follow the previous atom.
  bool prev_repeatable = false;
  for (int i = 0; regex[i]; i++) {
    if (regex[i] == '\\') {  // An escape sequence
      i++;
      if (regex[i] == '\0') {
        ADD_FAILURE() << FormatRegexSyntaxError(regex, i - 1)
                      << "'\\' cannot appear at the end.";
        return false;
      }

      if (!IsValidEscape(regex[i])) {
        ADD_FAILURE() << FormatRegexSyntaxError(regex, i - 1)
                      << "invalid escape sequence \"\\" << regex[i] << "\".";
        is_valid = false;
      }
      prev_repeatable = true;
    } else {  // Not an escape sequence.
      const char ch = regex[i];

      if (ch == '^' && i > 0) {
        ADD_FAILURE() << FormatRegexSyntaxError(regex, i)
                      << "'^' can only appear at the beginning.";
        is_valid = false;
      } else if (ch == '$' && regex[i + 1] != '\0') {
        ADD_FAILURE() << FormatRegexSyntaxError(regex, i)
                      << "'$' can only appear at the end.";
        is_valid = false;
      } else if (IsInSet(ch, "()[]{}|")) {
        ADD_FAILURE() << FormatRegexSyntaxError(regex, i)
                      << "'" << ch << "' is unsupported.";
        is_valid = false;
      } else if (IsRepeat(ch) && !prev_repeatable) {
        ADD_FAILURE() << FormatRegexSyntaxError(regex, i)
                      << "'" << ch << "' can only follow a repeatable token.";
        is_valid = false;
      }

      prev_repeatable = !IsInSet(ch, "^$?*+");
    }
  }

  return is_valid;
}

// Matches a repeated regex atom followed by a valid simple regular
// expression.  The regex atom is defined as c if escaped is false,
// or \c otherwise.  repeat is the repetition meta character (?, *,
// or +).  The behavior is undefined if str contains too many
// characters to be indexable by size_t, in which case the test will
// probably time out anyway.  We are fine with this limitation as
// std::string has it too.
bool MatchRepetitionAndRegexAtHead(
    bool escaped, char c, char repeat, const char* regex,
    const char* str) {
  const size_t min_count = (repeat == '+') ? 1 : 0;
  const size_t max_count = (repeat == '?') ? 1 :
      static_cast<size_t>(-1) - 1;
  // We cannot call numeric_limits::max() as it conflicts with the
  // max() macro on Windows.

  for (size_t i = 0; i <= max_count; ++i) {
    // We know that the atom matches each of the first i characters in str.
    if (i >= min_count && MatchRegexAtHead(regex, str + i)) {
      // We have enough matches at the head, and the tail matches too.
      // Since we only care about *whether* the pattern matches str
      // (as opposed to *how* it matches), there is no need to find a
      // greedy match.
      return true;
    }
    if (str[i] == '\0' || !AtomMatchesChar(escaped, c, str[i]))
      return false;
  }
  return false;
}

// Returns true iff regex matches a prefix of str.  regex must be a
// valid simple regular expression and not start with "^", or the
// result is undefined.
bool MatchRegexAtHead(const char* regex, const char* str) {
  if (*regex == '\0')  // An empty regex matches a prefix of anything.
    return true;

  // "$" only matches the end of a string.  Note that regex being
  // valid guarantees that there's nothing after "$" in it.
  if (*regex == '$')
    return *str == '\0';

  // Is the first thing in regex an escape sequence?
  const bool escaped = *regex == '\\';
  if (escaped)
    ++regex;
  if (IsRepeat(regex[1])) {
    // MatchRepetitionAndRegexAtHead() calls MatchRegexAtHead(), so
    // here's an indirect recursion.  It terminates as the regex gets
    // shorter in each recursion.
    return MatchRepetitionAndRegexAtHead(
        escaped, regex[0], regex[1], regex + 2, str);
  } else {
    // regex isn't empty, isn't "$", and doesn't start with a
    // repetition.  We match the first atom of regex with the first
    // character of str and recurse.
    return (*str != '\0') && AtomMatchesChar(escaped, *regex, *str) &&
        MatchRegexAtHead(regex + 1, str + 1);
  }
}

// Returns true iff regex matches any substring of str.  regex must be
// a valid simple regular expression, or the result is undefined.
//
// The algorithm is recursive, but the recursion depth doesn't exceed
// the regex length, so we won't need to worry about running out of
// stack space normally.  In rare cases the time complexity can be
// exponential with respect to the regex length + the string length,
// but usually it's must faster (often close to linear).
bool MatchRegexAnywhere(const char* regex, const char* str) {
  if (regex == NULL || str == NULL)
    return false;

  if (*regex == '^')
    return MatchRegexAtHead(regex + 1, str);

  // A successful match can be anywhere in str.
  do {
    if (MatchRegexAtHead(regex, str))
      return true;
  } while (*str++ != '\0');
  return false;
}

// Implements the RE class.

RE::~RE() {
  free(const_cast<char*>(pattern_));
  free(const_cast<char*>(full_pattern_));
}

// Returns true iff regular expression re matches the entire str.
bool RE::FullMatch(const char* str, const RE& re) {
  return re.is_valid_ && MatchRegexAnywhere(re.full_pattern_, str);
}

// Returns true iff regular expression re matches a substring of str
// (including str itself).
bool RE::PartialMatch(const char* str, const RE& re) {
  return re.is_valid_ && MatchRegexAnywhere(re.pattern_, str);
}

// Initializes an RE from its string representation.
void RE::Init(const char* regex) {
  pattern_ = full_pattern_ = NULL;
  if (regex != NULL) {
    pattern_ = posix::StrDup(regex);
  }

  is_valid_ = ValidateRegex(regex);
  if (!is_valid_) {
    // No need to calculate the full pattern when the regex is invalid.
    return;
  }

  const size_t len = strlen(regex);
  // Reserves enough bytes to hold the regular expression used for a
  // full match: we need space to prepend a '^', append a '$', and
  // terminate the string with '\0'.
  char* buffer = static_cast<char*>(malloc(len + 3));
  full_pattern_ = buffer;

  if (*regex != '^')
    *buffer++ = '^';  // Makes sure full_pattern_ starts with '^'.

  // We don't use snprintf or strncpy, as they trigger a warning when
  // compiled with VC++ 8.0.
  memcpy(buffer, regex, len);
  buffer += len;

  if (len == 0 || regex[len - 1] != '$')
    *buffer++ = '$';  // Makes sure full_pattern_ ends with '$'.

  *buffer = '\0';
}

#endif  // GTEST_USES_POSIX_RE

const char kUnknownFile[] = "unknown file";

// Formats a source file path and a line number as they would appear
// in an error message from the compiler used to compile this code.
GTEST_API_ ::std::string FormatFileLocation(const char* file, int line) {
  const std::string file_name(file == NULL ? kUnknownFile : file);

  if (line < 0) {
    return file_name + ":";
  }
#ifdef _MSC_VER
  return file_name + "(" + StreamableToString(line) + "):";
#else
  return file_name + ":" + StreamableToString(line) + ":";
#endif  // _MSC_VER
}

// Formats a file location for compiler-independent XML output.
// Although this function is not platform dependent, we put it next to
// FormatFileLocation in order to contrast the two functions.
// Note that FormatCompilerIndependentFileLocation() does NOT append colon
// to the file location it produces, unlike FormatFileLocation().
GTEST_API_ ::std::string FormatCompilerIndependentFileLocation(
    const char* file, int line) {
  const std::string file_name(file == NULL ? kUnknownFile : file);

  if (line < 0)
    return file_name;
  else
    return file_name + ":" + StreamableToString(line);
}

GTestLog::GTestLog(GTestLogSeverity severity, const char* file, int line)
    : severity_(severity) {
  const char* const marker =
      severity == GTEST_INFO ?    "[  INFO ]" :
      severity == GTEST_WARNING ? "[WARNING]" :
      severity == GTEST_ERROR ?   "[ ERROR ]" : "[ FATAL ]";
  GetStream() << ::std::endl << marker << " "
              << FormatFileLocation(file, line).c_str() << ": ";
}

// Flushes the buffers and, if severity is GTEST_FATAL, aborts the program.
GTestLog::~GTestLog() {
  GetStream() << ::std::endl;
  if (severity_ == GTEST_FATAL) {
    fflush(stderr);
    posix::Abort();
  }
}
// Disable Microsoft deprecation warnings for POSIX functions called from
// this class (creat, dup, dup2, and close)
GTEST_DISABLE_MSC_WARNINGS_PUSH_(4996)

#if GTEST_HAS_STREAM_REDIRECTION

// Object that captures an output stream (stdout/stderr).
class CapturedStream {
 public:
  // The ctor redirects the stream to a temporary file.
  explicit CapturedStream(int fd) : fd_(fd), uncaptured_fd_(dup(fd)) {
# if GTEST_OS_WINDOWS
    char temp_dir_path[MAX_PATH + 1] = { '\0' };  // NOLINT
    char temp_file_path[MAX_PATH + 1] = { '\0' };  // NOLINT

    ::GetTempPathA(sizeof(temp_dir_path), temp_dir_path);
    const UINT success = ::GetTempFileNameA(temp_dir_path,
                                            "gtest_redir",
                                            0,  // Generate unique file name.
                                            temp_file_path);
    GTEST_CHECK_(success != 0)
        << "Unable to create a temporary file in " << temp_dir_path;
    const int captured_fd = creat(temp_file_path, _S_IREAD | _S_IWRITE);
    GTEST_CHECK_(captured_fd != -1) << "Unable to open temporary file "
                                    << temp_file_path;
    filename_ = temp_file_path;
# else
    // There's no guarantee that a test has write access to the current
    // directory, so we create the temporary file in the /tmp directory
    // instead. We use /tmp on most systems, and /sdcard on Android.
    // That's because Android doesn't have /tmp.
#  if GTEST_OS_LINUX_ANDROID
    // Note: Android applications are expected to call the framework's
    // Context.getExternalStorageDirectory() method through JNI to get
    // the location of the world-writable SD Card directory. However,
    // this requires a Context handle, which cannot be retrieved
    // globally from native code. Doing so also precludes running the
    // code as part of a regular standalone executable, which doesn't
    // run in a Dalvik process (e.g. when running it through 'adb shell').
    //
    // The location /sdcard is directly accessible from native code
    // and is the only location (unofficially) supported by the Android
    // team. It's generally a symlink to the real SD Card mount point
    // which can be /mnt/sdcard, /mnt/sdcard0, /system/media/sdcard, or
    // other OEM-customized locations. Never rely on these, and always
    // use /sdcard.
    char name_template[] = "/sdcard/gtest_captured_stream.XXXXXX";
#  else
    char name_template[] = "/tmp/captured_stream.XXXXXX";
#  endif  // GTEST_OS_LINUX_ANDROID
    const int captured_fd = mkstemp(name_template);
    filename_ = name_template;
# endif  // GTEST_OS_WINDOWS
    fflush(NULL);
    dup2(captured_fd, fd_);
    close(captured_fd);
  }

  ~CapturedStream() {
    remove(filename_.c_str());
  }

  std::string GetCapturedString() {
    if (uncaptured_fd_ != -1) {
      // Restores the original stream.
      fflush(NULL);
      dup2(uncaptured_fd_, fd_);
      close(uncaptured_fd_);
      uncaptured_fd_ = -1;
    }

    FILE* const file = posix::FOpen(filename_.c_str(), "r");
    const std::string content = ReadEntireFile(file);
    posix::FClose(file);
    return content;
  }

 private:
  const int fd_;  // A stream to capture.
  int uncaptured_fd_;
  // Name of the temporary file holding the stderr output.
  ::std::string filename_;

  GTEST_DISALLOW_COPY_AND_ASSIGN_(CapturedStream);
};

GTEST_DISABLE_MSC_WARNINGS_POP_()

static CapturedStream* g_captured_stderr = NULL;
static CapturedStream* g_captured_stdout = NULL;

// Starts capturing an output stream (stdout/stderr).
void CaptureStream(int fd, const char* stream_name, CapturedStream** stream) {
  if (*stream != NULL) {
    GTEST_LOG_(FATAL) << "Only one " << stream_name
                      << " capturer can exist at a time.";
  }
  *stream = new CapturedStream(fd);
}

// Stops capturing the output stream and returns the captured string.
std::string GetCapturedStream(CapturedStream** captured_stream) {
  const std::string content = (*captured_stream)->GetCapturedString();

  delete *captured_stream;
  *captured_stream = NULL;

  return content;
}

// Starts capturing stdout.
void CaptureStdout() {
  CaptureStream(kStdOutFileno, "stdout", &g_captured_stdout);
}

// Starts capturing stderr.
void CaptureStderr() {
  CaptureStream(kStdErrFileno, "stderr", &g_captured_stderr);
}

// Stops capturing stdout and returns the captured string.
std::string GetCapturedStdout() {
  return GetCapturedStream(&g_captured_stdout);
}

// Stops capturing stderr and returns the captured string.
std::string GetCapturedStderr() {
  return GetCapturedStream(&g_captured_stderr);
}

#endif  // GTEST_HAS_STREAM_REDIRECTION

std::string TempDir() {
#if GTEST_OS_WINDOWS_MOBILE
  return "\\temp\\";
#elif GTEST_OS_WINDOWS
  const char* temp_dir = posix::GetEnv("TEMP");
  if (temp_dir == NULL || temp_dir[0] == '\0')
    return "\\temp\\";
  else if (temp_dir[strlen(temp_dir) - 1] == '\\')
    return temp_dir;
  else
    return std::string(temp_dir) + "\\";
#elif GTEST_OS_LINUX_ANDROID
  return "/sdcard/";
#else
  return "/tmp/";
#endif  // GTEST_OS_WINDOWS_MOBILE
}

size_t GetFileSize(FILE* file) {
  fseek(file, 0, SEEK_END);
  return static_cast<size_t>(ftell(file));
}

std::string ReadEntireFile(FILE* file) {
  const size_t file_size = GetFileSize(file);
  char* const buffer = new char[file_size];

  size_t bytes_last_read = 0;  // # of bytes read in the last fread()
  size_t bytes_read = 0;       // # of bytes read so far

  fseek(file, 0, SEEK_SET);

  // Keeps reading the file until we cannot read further or the
  // pre-determined file size is reached.
  do {
    bytes_last_read = fread(buffer+bytes_read, 1, file_size-bytes_read, file);
    bytes_read += bytes_last_read;
  } while (bytes_last_read > 0 && bytes_read < file_size);

  const std::string content(buffer, bytes_read);
  delete[] buffer;

  return content;
}

#if GTEST_HAS_DEATH_TEST

static const ::std::vector<testing::internal::string>* g_injected_test_argvs =
                                        NULL;  // Owned.

void SetInjectableArgvs(const ::std::vector<testing::internal::string>* argvs) {
  if (g_injected_test_argvs != argvs)
    delete g_injected_test_argvs;
  g_injected_test_argvs = argvs;
}

const ::std::vector<testing::internal::string>& GetInjectableArgvs() {
  if (g_injected_test_argvs != NULL) {
    return *g_injected_test_argvs;
  }
  return GetArgvs();
}
#endif  // GTEST_HAS_DEATH_TEST

#if GTEST_OS_WINDOWS_MOBILE
namespace posix {
void Abort() {
  DebugBreak();
  TerminateProcess(GetCurrentProcess(), 1);
}
}  // namespace posix
#endif  // GTEST_OS_WINDOWS_MOBILE

// Returns the name of the environment variable corresponding to the
// given flag.  For example, FlagToEnvVar("foo") will return
// "GTEST_FOO" in the open-source version.
static std::string FlagToEnvVar(const char* flag) {
  const std::string full_flag =
      (Message() << GTEST_FLAG_PREFIX_ << flag).GetString();

  Message env_var;
  for (size_t i = 0; i != full_flag.length(); i++) {
    env_var << ToUpper(full_flag.c_str()[i]);
  }

  return env_var.GetString();
}

// Parses 'str' for a 32-bit signed integer.  If successful, writes
// the result to *value and returns true; otherwise leaves *value
// unchanged and returns false.
bool ParseInt32(const Message& src_text, const char* str, Int32* value) {
  // Parses the environment variable as a decimal integer.
  char* end = NULL;
  const long long_value = strtol(str, &end, 10);  // NOLINT

  // Has strtol() consumed all characters in the string?
  if (*end != '\0') {
    // No - an invalid character was encountered.
    Message msg;
    msg << "WARNING: " << src_text
        << " is expected to be a 32-bit integer, but actually"
        << " has value \"" << str << "\".\n";
    printf("%s", msg.GetString().c_str());
    fflush(stdout);
    return false;
  }

  // Is the parsed value in the range of an Int32?
  const Int32 result = static_cast<Int32>(long_value);
  if (long_value == LONG_MAX || long_value == LONG_MIN ||
      // The parsed value overflows as a long.  (strtol() returns
      // LONG_MAX or LONG_MIN when the input overflows.)
      result != long_value
      // The parsed value overflows as an Int32.
      ) {
    Message msg;
    msg << "WARNING: " << src_text
        << " is expected to be a 32-bit integer, but actually"
        << " has value " << str << ", which overflows.\n";
    printf("%s", msg.GetString().c_str());
    fflush(stdout);
    return false;
  }

  *value = result;
  return true;
}

// Reads and returns the Boolean environment variable corresponding to
// the given flag; if it's not set, returns default_value.
//
// The value is considered true iff it's not "0".
bool BoolFromGTestEnv(const char* flag, bool default_value) {
#if defined(GTEST_GET_BOOL_FROM_ENV_)
  return GTEST_GET_BOOL_FROM_ENV_(flag, default_value);
#endif  // defined(GTEST_GET_BOOL_FROM_ENV_)
  const std::string env_var = FlagToEnvVar(flag);
  const char* const string_value = posix::GetEnv(env_var.c_str());
  return string_value == NULL ?
      default_value : strcmp(string_value, "0") != 0;
}

// Reads and returns a 32-bit integer stored in the environment
// variable corresponding to the given flag; if it isn't set or
// doesn't represent a valid 32-bit integer, returns default_value.
Int32 Int32FromGTestEnv(const char* flag, Int32 default_value) {
#if defined(GTEST_GET_INT32_FROM_ENV_)
  return GTEST_GET_INT32_FROM_ENV_(flag, default_value);
#endif  // defined(GTEST_GET_INT32_FROM_ENV_)
  const std::string env_var = FlagToEnvVar(flag);
  const char* const string_value = posix::GetEnv(env_var.c_str());
  if (string_value == NULL) {
    // The environment variable is not set.
    return default_value;
  }

  Int32 result = default_value;
  if (!ParseInt32(Message() << "Environment variable " << env_var,
                  string_value, &result)) {
    printf("The default value %s is used.\n",
           (Message() << default_value).GetString().c_str());
    fflush(stdout);
    return default_value;
  }

  return result;
}

// Reads and returns the string environment variable corresponding to
// the given flag; if it's not set, returns default_value.
const char* StringFromGTestEnv(const char* flag, const char* default_value) {
#if defined(GTEST_GET_STRING_FROM_ENV_)
  return GTEST_GET_STRING_FROM_ENV_(flag, default_value);
#endif  // defined(GTEST_GET_STRING_FROM_ENV_)
  const std::string env_var = FlagToEnvVar(flag);
  const char* const value = posix::GetEnv(env_var.c_str());
  return value == NULL ? default_value : value;
}

}  // namespace internal
}  // namespace testing
// Copyright 2007, Google Inc.
// All rights reserved.
//
// Redistribution and use in source and binary forms, with or without
// modification, are permitted provided that the following conditions are
// met:
//
//     * Redistributions of source code must retain the above copyright
// notice, this list of conditions and the following disclaimer.
//     * Redistributions in binary form must reproduce the above
// copyright notice, this list of conditions and the following disclaimer
// in the documentation and/or other materials provided with the
// distribution.
//     * Neither the name of Google Inc. nor the names of its
// contributors may be used to endorse or promote products derived from
// this software without specific prior written permission.
//
// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
// "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
// LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
// A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
// OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
// SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
// LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
// DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
// THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
//
// Author: wan@google.com (Zhanyong Wan)

// Google Test - The Google C++ Testing Framework
//
// This file implements a universal value printer that can print a
// value of any type T:
//
//   void ::testing::internal::UniversalPrinter<T>::Print(value, ostream_ptr);
//
// It uses the << operator when possible, and prints the bytes in the
// object otherwise.  A user can override its behavior for a class
// type Foo by defining either operator<<(::std::ostream&, const Foo&)
// or void PrintTo(const Foo&, ::std::ostream*) in the namespace that
// defines Foo.

#include <ctype.h>
#include <stdio.h>
#include <cwchar>
#include <ostream>  // NOLINT
#include <string>

namespace testing {

namespace {

using ::std::ostream;

// Prints a segment of bytes in the given object.
GTEST_ATTRIBUTE_NO_SANITIZE_MEMORY_
GTEST_ATTRIBUTE_NO_SANITIZE_ADDRESS_
GTEST_ATTRIBUTE_NO_SANITIZE_THREAD_
void PrintByteSegmentInObjectTo(const unsigned char* obj_bytes, size_t start,
                                size_t count, ostream* os) {
  char text[5] = "";
  for (size_t i = 0; i != count; i++) {
    const size_t j = start + i;
    if (i != 0) {
      // Organizes the bytes into groups of 2 for easy parsing by
      // human.
      if ((j % 2) == 0)
        *os << ' ';
      else
        *os << '-';
    }
    GTEST_SNPRINTF_(text, sizeof(text), "%02X", obj_bytes[j]);
    *os << text;
  }
}

// Prints the bytes in the given value to the given ostream.
void PrintBytesInObjectToImpl(const unsigned char* obj_bytes, size_t count,
                              ostream* os) {
  // Tells the user how big the object is.
  *os << count << "-byte object <";

  const size_t kThreshold = 132;
  const size_t kChunkSize = 64;
  // If the object size is bigger than kThreshold, we'll have to omit
  // some details by printing only the first and the last kChunkSize
  // bytes.
  // TODO(wan): let the user control the threshold using a flag.
  if (count < kThreshold) {
    PrintByteSegmentInObjectTo(obj_bytes, 0, count, os);
  } else {
    PrintByteSegmentInObjectTo(obj_bytes, 0, kChunkSize, os);
    *os << " ... ";
    // Rounds up to 2-byte boundary.
    const size_t resume_pos = (count - kChunkSize + 1)/2*2;
    PrintByteSegmentInObjectTo(obj_bytes, resume_pos, count - resume_pos, os);
  }
  *os << ">";
}

}  // namespace

namespace internal2 {

// Delegates to PrintBytesInObjectToImpl() to print the bytes in the
// given object.  The delegation simplifies the implementation, which
// uses the << operator and thus is easier done outside of the
// ::testing::internal namespace, which contains a << operator that
// sometimes conflicts with the one in STL.
void PrintBytesInObjectTo(const unsigned char* obj_bytes, size_t count,
                          ostream* os) {
  PrintBytesInObjectToImpl(obj_bytes, count, os);
}

}  // namespace internal2

namespace internal {

// Depending on the value of a char (or wchar_t), we print it in one
// of three formats:
//   - as is if it's a printable ASCII (e.g. 'a', '2', ' '),
//   - as a hexidecimal escape sequence (e.g. '\x7F'), or
//   - as a special escape sequence (e.g. '\r', '\n').
enum CharFormat {
  kAsIs,
  kHexEscape,
  kSpecialEscape
};

// Returns true if c is a printable ASCII character.  We test the
// value of c directly instead of calling isprint(), which is buggy on
// Windows Mobile.
inline bool IsPrintableAscii(wchar_t c) {
  return 0x20 <= c && c <= 0x7E;
}

// Prints a wide or narrow char c as a character literal without the
// quotes, escaping it when necessary; returns how c was formatted.
// The template argument UnsignedChar is the unsigned version of Char,
// which is the type of c.
template <typename UnsignedChar, typename Char>
static CharFormat PrintAsCharLiteralTo(Char c, ostream* os) {
  switch (static_cast<wchar_t>(c)) {
    case L'\0':
      *os << "\\0";
      break;
    case L'\'':
      *os << "\\'";
      break;
    case L'\\':
      *os << "\\\\";
      break;
    case L'\a':
      *os << "\\a";
      break;
    case L'\b':
      *os << "\\b";
      break;
    case L'\f':
      *os << "\\f";
      break;
    case L'\n':
      *os << "\\n";
      break;
    case L'\r':
      *os << "\\r";
      break;
    case L'\t':
      *os << "\\t";
      break;
    case L'\v':
      *os << "\\v";
      break;
    default:
      if (IsPrintableAscii(c)) {
        *os << static_cast<char>(c);
        return kAsIs;
      } else {
        *os << "\\x" + String::FormatHexInt(static_cast<UnsignedChar>(c));
        return kHexEscape;
      }
  }
  return kSpecialEscape;
}

// Prints a wchar_t c as if it's part of a string literal, escaping it when
// necessary; returns how c was formatted.
static CharFormat PrintAsStringLiteralTo(wchar_t c, ostream* os) {
  switch (c) {
    case L'\'':
      *os << "'";
      return kAsIs;
    case L'"':
      *os << "\\\"";
      return kSpecialEscape;
    default:
      return PrintAsCharLiteralTo<wchar_t>(c, os);
  }
}

// Prints a char c as if it's part of a string literal, escaping it when
// necessary; returns how c was formatted.
static CharFormat PrintAsStringLiteralTo(char c, ostream* os) {
  return PrintAsStringLiteralTo(
      static_cast<wchar_t>(static_cast<unsigned char>(c)), os);
}

// Prints a wide or narrow character c and its code.  '\0' is printed
// as "'\\0'", other unprintable characters are also properly escaped
// using the standard C++ escape sequence.  The template argument
// UnsignedChar is the unsigned version of Char, which is the type of c.
template <typename UnsignedChar, typename Char>
void PrintCharAndCodeTo(Char c, ostream* os) {
  // First, print c as a literal in the most readable form we can find.
  *os << ((sizeof(c) > 1) ? "L'" : "'");
  const CharFormat format = PrintAsCharLiteralTo<UnsignedChar>(c, os);
  *os << "'";

  // To aid user debugging, we also print c's code in decimal, unless
  // it's 0 (in which case c was printed as '\\0', making the code
  // obvious).
  if (c == 0)
    return;
  *os << " (" << static_cast<int>(c);

  // For more convenience, we print c's code again in hexidecimal,
  // unless c was already printed in the form '\x##' or the code is in
  // [1, 9].
  if (format == kHexEscape || (1 <= c && c <= 9)) {
    // Do nothing.
  } else {
    *os << ", 0x" << String::FormatHexInt(static_cast<UnsignedChar>(c));
  }
  *os << ")";
}

void PrintTo(unsigned char c, ::std::ostream* os) {
  PrintCharAndCodeTo<unsigned char>(c, os);
}
void PrintTo(signed char c, ::std::ostream* os) {
  PrintCharAndCodeTo<unsigned char>(c, os);
}

// Prints a wchar_t as a symbol if it is printable or as its internal
// code otherwise and also as its code.  L'\0' is printed as "L'\\0'".
void PrintTo(wchar_t wc, ostream* os) {
  PrintCharAndCodeTo<wchar_t>(wc, os);
}

// Prints the given array of characters to the ostream.  CharType must be either
// char or wchar_t.
// The array starts at begin, the length is len, it may include '\0' characters
// and may not be NUL-terminated.
template <typename CharType>
GTEST_ATTRIBUTE_NO_SANITIZE_MEMORY_
GTEST_ATTRIBUTE_NO_SANITIZE_ADDRESS_
GTEST_ATTRIBUTE_NO_SANITIZE_THREAD_
static void PrintCharsAsStringTo(
    const CharType* begin, size_t len, ostream* os) {
  const char* const kQuoteBegin = sizeof(CharType) == 1 ? "\"" : "L\"";
  *os << kQuoteBegin;
  bool is_previous_hex = false;
  for (size_t index = 0; index < len; ++index) {
    const CharType cur = begin[index];
    if (is_previous_hex && IsXDigit(cur)) {
      // Previous character is of '\x..' form and this character can be
      // interpreted as another hexadecimal digit in its number. Break string to
      // disambiguate.
      *os << "\" " << kQuoteBegin;
    }
    is_previous_hex = PrintAsStringLiteralTo(cur, os) == kHexEscape;
  }
  *os << "\"";
}

// Prints a (const) char/wchar_t array of 'len' elements, starting at address
// 'begin'.  CharType must be either char or wchar_t.
template <typename CharType>
GTEST_ATTRIBUTE_NO_SANITIZE_MEMORY_
GTEST_ATTRIBUTE_NO_SANITIZE_ADDRESS_
GTEST_ATTRIBUTE_NO_SANITIZE_THREAD_
static void UniversalPrintCharArray(
    const CharType* begin, size_t len, ostream* os) {
  // The code
  //   const char kFoo[] = "foo";
  // generates an array of 4, not 3, elements, with the last one being '\0'.
  //
  // Therefore when printing a char array, we don't print the last element if
  // it's '\0', such that the output matches the string literal as it's
  // written in the source code.
  if (len > 0 && begin[len - 1] == '\0') {
    PrintCharsAsStringTo(begin, len - 1, os);
    return;
  }

  // If, however, the last element in the array is not '\0', e.g.
  //    const char kFoo[] = { 'f', 'o', 'o' };
  // we must print the entire array.  We also print a message to indicate
  // that the array is not NUL-terminated.
  PrintCharsAsStringTo(begin, len, os);
  *os << " (no terminating NUL)";
}

// Prints a (const) char array of 'len' elements, starting at address 'begin'.
void UniversalPrintArray(const char* begin, size_t len, ostream* os) {
  UniversalPrintCharArray(begin, len, os);
}

// Prints a (const) wchar_t array of 'len' elements, starting at address
// 'begin'.
void UniversalPrintArray(const wchar_t* begin, size_t len, ostream* os) {
  UniversalPrintCharArray(begin, len, os);
}

// Prints the given C string to the ostream.
void PrintTo(const char* s, ostream* os) {
  if (s == NULL) {
    *os << "NULL";
  } else {
    *os << ImplicitCast_<const void*>(s) << " pointing to ";
    PrintCharsAsStringTo(s, strlen(s), os);
  }
}

// MSVC compiler can be configured to define whar_t as a typedef
// of unsigned short. Defining an overload for const wchar_t* in that case
// would cause pointers to unsigned shorts be printed as wide strings,
// possibly accessing more memory than intended and causing invalid
// memory accesses. MSVC defines _NATIVE_WCHAR_T_DEFINED symbol when
// wchar_t is implemented as a native type.
#if !defined(_MSC_VER) || defined(_NATIVE_WCHAR_T_DEFINED)
// Prints the given wide C string to the ostream.
void PrintTo(const wchar_t* s, ostream* os) {
  if (s == NULL) {
    *os << "NULL";
  } else {
    *os << ImplicitCast_<const void*>(s) << " pointing to ";
    PrintCharsAsStringTo(s, std::wcslen(s), os);
  }
}
#endif  // wchar_t is native

// Prints a ::string object.
#if GTEST_HAS_GLOBAL_STRING
void PrintStringTo(const ::string& s, ostream* os) {
  PrintCharsAsStringTo(s.data(), s.size(), os);
}
#endif  // GTEST_HAS_GLOBAL_STRING

void PrintStringTo(const ::std::string& s, ostream* os) {
  PrintCharsAsStringTo(s.data(), s.size(), os);
}

// Prints a ::wstring object.
#if GTEST_HAS_GLOBAL_WSTRING
void PrintWideStringTo(const ::wstring& s, ostream* os) {
  PrintCharsAsStringTo(s.data(), s.size(), os);
}
#endif  // GTEST_HAS_GLOBAL_WSTRING

#if GTEST_HAS_STD_WSTRING
void PrintWideStringTo(const ::std::wstring& s, ostream* os) {
  PrintCharsAsStringTo(s.data(), s.size(), os);
}
#endif  // GTEST_HAS_STD_WSTRING

}  // namespace internal

}  // namespace testing
// Copyright 2008, Google Inc.
// All rights reserved.
//
// Redistribution and use in source and binary forms, with or without
// modification, are permitted provided that the following conditions are
// met:
//
//     * Redistributions of source code must retain the above copyright
// notice, this list of conditions and the following disclaimer.
//     * Redistributions in binary form must reproduce the above
// copyright notice, this list of conditions and the following disclaimer
// in the documentation and/or other materials provided with the
// distribution.
//     * Neither the name of Google Inc. nor the names of its
// contributors may be used to endorse or promote products derived from
// this software without specific prior written permission.
//
// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
// "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
// LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
// A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
// OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
// SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
// LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
// DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
// THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
//
// Author: mheule@google.com (Markus Heule)
//
// The Google C++ Testing Framework (Google Test)


// Indicates that this translation unit is part of Google Test's
// implementation.  It must come before gtest-internal-inl.h is
// included, or there will be a compiler error.  This trick exists to
// prevent the accidental inclusion of gtest-internal-inl.h in the
// user's code.
#define GTEST_IMPLEMENTATION_ 1
#undef GTEST_IMPLEMENTATION_

namespace testing {

using internal::GetUnitTestImpl;

// Gets the summary of the failure message by omitting the stack trace
// in it.
std::string TestPartResult::ExtractSummary(const char* message) {
  const char* const stack_trace = strstr(message, internal::kStackTraceMarker);
  return stack_trace == NULL ? message :
      std::string(message, stack_trace);
}

// Prints a TestPartResult object.
std::ostream& operator<<(std::ostream& os, const TestPartResult& result) {
  return os
      << result.file_name() << ":" << result.line_number() << ": "
      << (result.type() == TestPartResult::kSuccess ? "Success" :
          result.type() == TestPartResult::kFatalFailure ? "Fatal failure" :
          "Non-fatal failure") << ":\n"
      << result.message() << std::endl;
}

// Appends a TestPartResult to the array.
void TestPartResultArray::Append(const TestPartResult& result) {
  array_.push_back(result);
}

// Returns the TestPartResult at the given index (0-based).
const TestPartResult& TestPartResultArray::GetTestPartResult(int index) const {
  if (index < 0 || index >= size()) {
    printf("\nInvalid index (%d) into TestPartResultArray.\n", index);
    internal::posix::Abort();
  }

  return array_[index];
}

// Returns the number of TestPartResult objects in the array.
int TestPartResultArray::size() const {
  return static_cast<int>(array_.size());
}

namespace internal {

HasNewFatalFailureHelper::HasNewFatalFailureHelper()
    : has_new_fatal_failure_(false),
      original_reporter_(GetUnitTestImpl()->
                         GetTestPartResultReporterForCurrentThread()) {
  GetUnitTestImpl()->SetTestPartResultReporterForCurrentThread(this);
}

HasNewFatalFailureHelper::~HasNewFatalFailureHelper() {
  GetUnitTestImpl()->SetTestPartResultReporterForCurrentThread(
      original_reporter_);
}

void HasNewFatalFailureHelper::ReportTestPartResult(
    const TestPartResult& result) {
  if (result.fatally_failed())
    has_new_fatal_failure_ = true;
  original_reporter_->ReportTestPartResult(result);
}

}  // namespace internal

}  // namespace testing
// Copyright 2008 Google Inc.
// All Rights Reserved.
//
// Redistribution and use in source and binary forms, with or without
// modification, are permitted provided that the following conditions are
// met:
//
//     * Redistributions of source code must retain the above copyright
// notice, this list of conditions and the following disclaimer.
//     * Redistributions in binary form must reproduce the above
// copyright notice, this list of conditions and the following disclaimer
// in the documentation and/or other materials provided with the
// distribution.
//     * Neither the name of Google Inc. nor the names of its
// contributors may be used to endorse or promote products derived from
// this software without specific prior written permission.
//
// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
// "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
// LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
// A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
// OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
// SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
// LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
// DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
// THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
//
// Author: wan@google.com (Zhanyong Wan)


namespace testing {
namespace internal {

#if GTEST_HAS_TYPED_TEST_P

// Skips to the first non-space char in str. Returns an empty string if str
// contains only whitespace characters.
static const char* SkipSpaces(const char* str) {
  while (IsSpace(*str))
    str++;
  return str;
}

static std::vector<std::string> SplitIntoTestNames(const char* src) {
  std::vector<std::string> name_vec;
  src = SkipSpaces(src);
  for (; src != NULL; src = SkipComma(src)) {
    name_vec.push_back(StripTrailingSpaces(GetPrefixUntilComma(src)));
  }
  return name_vec;
}

// Verifies that registered_tests match the test names in
// registered_tests_; returns registered_tests if successful, or
// aborts the program otherwise.
const char* TypedTestCasePState::VerifyRegisteredTestNames(
    const char* file, int line, const char* registered_tests) {
  typedef RegisteredTestsMap::const_iterator RegisteredTestIter;
  registered_ = true;

  std::vector<std::string> name_vec = SplitIntoTestNames(registered_tests);

  Message errors;

  std::set<std::string> tests;
  for (std::vector<std::string>::const_iterator name_it = name_vec.begin();
       name_it != name_vec.end(); ++name_it) {
    const std::string& name = *name_it;
    if (tests.count(name) != 0) {
      errors << "Test " << name << " is listed more than once.\n";
      continue;
    }

    bool found = false;
    for (RegisteredTestIter it = registered_tests_.begin();
         it != registered_tests_.end();
         ++it) {
      if (name == it->first) {
        found = true;
        break;
      }
    }

    if (found) {
      tests.insert(name);
    } else {
      errors << "No test named " << name
             << " can be found in this test case.\n";
    }
  }

  for (RegisteredTestIter it = registered_tests_.begin();
       it != registered_tests_.end();
       ++it) {
    if (tests.count(it->first) == 0) {
      errors << "You forgot to list test " << it->first << ".\n";
    }
  }

  const std::string& errors_str = errors.GetString();
  if (errors_str != "") {
    fprintf(stderr, "%s %s", FormatFileLocation(file, line).c_str(),
            errors_str.c_str());
    fflush(stderr);
    posix::Abort();
  }

  return registered_tests;
}

#endif  // GTEST_HAS_TYPED_TEST_P

}  // namespace internal
}  // namespace testing


================================================
FILE: libs/gtest_mpi/include/gtest_mpi/gtest_mpi.hpp
================================================
// This project contains source code from the Googletest framework
// obtained from https://github.com/google/googletest with the following
// terms:
//
// Copyright 2005, Google Inc.
// All rights reserved.
//
// Redistribution and use in source and binary forms, with or without
// modification, are permitted provided that the following conditions are
// met:
//
//     * Redistributions of source code must retain the above copyright
// notice, this list of conditions and the following disclaimer.
//     * Redistributions in binary form must reproduce the above
// copyright notice, this list of conditions and the following disclaimer
// in the documentation and/or other materials provided with the
// distribution.
//     * Neither the name of Google Inc. nor the names of its
// contributors may be used to endorse or promote products derived from
// this software without specific prior written permission.
//
// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
// "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
// LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
// A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
// OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
// SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
// LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
// DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
// THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
// ---------------------------------------------------------------------
//
// Modifications and additions are published under the following terms:
//
// Copyright 2019, Simon Frasch
// All rights reserved.
//
// Redistribution and use in source and binary forms, with or without
// modification, are permitted provided that the following conditions are
// met:
//
//     * Redistributions of source code must retain the above copyright
// notice, this list of conditions and the following disclaimer.
//     * Redistributions in binary form must reproduce the above
// copyright notice, this list of conditions and the following disclaimer
// in the documentation and/or other materials provided with the
// distribution.
//     * Neither the name of the copyright holder nor the names of its
// contributors may be used to endorse or promote products derived from
// this software without specific prior written permission.
//
// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
// "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
// LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
// A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
// OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
// SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
// LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
// DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
// THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
// ---------------------------------------------------------------------

#ifndef GUARD_GTEST_MPI_HPP
#define GUARD_GTEST_MPI_HPP
#include <gtest/gtest.h>
#include <mpi.h>
#include <unistd.h>
#include <set>
#include <string>
#include "gtest_mpi_internal.hpp"

namespace gtest_mpi {
namespace {  // no external linkage

class MPITestEnvironment : public ::testing::Environment {
public:
  MPITestEnvironment() : ::testing::Environment() {}

  MPITestEnvironment(const MPITestEnvironment&) = delete;

  MPITestEnvironment(MPITestEnvironment&&) = default;

  static MPI_Comm GetComm() { return global_test_comm; }

  void SetUp() override {
    if (global_test_comm != MPI_COMM_WORLD) {
      MPI_Comm_free(&global_test_comm);
      global_test_comm = MPI_COMM_WORLD;
    }
    MPI_Comm_dup(MPI_COMM_WORLD, &global_test_comm);
  }

  void TearDown() override {
    if (global_test_comm != MPI_COMM_WORLD) {
      MPI_Comm_free(&global_test_comm);
      global_test_comm = MPI_COMM_WORLD;
    }
  }

private:
  static MPI_Comm global_test_comm;
};
MPI_Comm MPITestEnvironment::global_test_comm = MPI_COMM_WORLD;

class PrettyMPIUnitTestResultPrinter : public ::testing::TestEventListener {
public:
  PrettyMPIUnitTestResultPrinter()
      : rank_(0),
        comm_size_(1),
        comm_(MPITestEnvironment::GetComm()),
        num_sucessfull_tests_(0),
        num_failed_tests_(0) {
    MPI_Comm_rank(comm_, &rank_);
    MPI_Comm_size(comm_, &comm_size_);
  }

  // The following methods override what's in the TestEventListener class.
  void OnTestIterationStart(const ::testing::UnitTest& unit_test, int iteration) override;
  void OnEnvironmentsSetUpStart(const ::testing::UnitTest& unit_test) override;
  void OnTestCaseStart(const ::testing::TestCase& test_case) override;
  void OnTestStart(const ::testing::TestInfo& test_info) override;
  void OnTestPartResult(const ::testing::TestPartResult& result) override;
  void OnTestEnd(const ::testing::TestInfo& test_info) override;
  void OnTestCaseEnd(const ::testing::TestCase& test_case) override;
  void OnEnvironmentsTearDownStart(const ::testing::UnitTest& unit_test) override;
  void OnTestIterationEnd(const ::testing::UnitTest& unit_test, int iteration) override;

  void OnEnvironmentsSetUpEnd(const ::testing::UnitTest& /*unit_test*/) override {}
  void OnEnvironmentsTearDownEnd(const ::testing::UnitTest& /*unit_test*/) override {}
  void OnTestProgramStart(const ::testing::UnitTest& /*unit_test*/) override {}
  void OnTestProgramEnd(const ::testing::UnitTest& /*unit_test*/) override {}

private:
  int rank_;
  int comm_size_;
  MPI_Comm comm_;
  TestPartResultCollection failed_results_;
  int num_sucessfull_tests_;
  int num_failed_tests_;
  std::set<int> failed_ranks_;
  std::vector<TestInfoProperties> failed_test_properties_;
};

// Taken / modified from Googletest
void PrettyMPIUnitTestResultPrinter::OnTestIterationStart(const ::testing::UnitTest& unit_test,
                                                          int iteration) {
  using namespace ::testing;
  using namespace ::testing::internal;
  if (rank_ != 0) return;

  if (GTEST_FLAG(repeat) != 1)
    printf("\nRepeating all tests (iteration %d) . . .\n\n", iteration + 1);

  const char* const filter = GTEST_FLAG(filter).c_str();

  // Prints the filter if it's not *.  This reminds the user that some
  // tests may be skipped.
  if (!String::CStringEquals(filter, kUniversalFilter)) {
    ColoredPrintf(COLOR_YELLOW, "Note: %s filter = %s\n", GTEST_NAME_, filter);
  }

  if (ShouldShard(kTestTotalShards, kTestShardIndex, false)) {
    const Int32 shard_index = Int32FromEnvOrDie(kTestShardIndex, -1);
    ColoredPrintf(COLOR_YELLOW, "Note: This is test shard %d of %s.\n",
                  static_cast<int>(shard_index) + 1, internal::posix::GetEnv(kTestTotalShards));
  }

  if (GTEST_FLAG(shuffle)) {
    ColoredPrintf(COLOR_YELLOW, "Note: Randomizing tests' orders with a seed of %d .\n",
                  unit_test.random_seed());
  }

  ColoredPrintf(COLOR_GREEN, "[==========] ");
  printf("Running %s from %s.\n", FormatTestCount(unit_test.test_to_run_count()).c_str(),
         FormatTestCaseCount(unit_test.test_case_to_run_count()).c_str());
  fflush(stdout);
}

// Taken / modified from Googletest
void PrettyMPIUnitTestResultPrinter::OnEnvironmentsSetUpStart(
    const ::testing::UnitTest& /*unit_test*/) {
  if (rank_ != 0) return;
  ColoredPrintf(COLOR_GREEN, "[----------] ");
  printf("Global test environment set-up.\n");
  fflush(stdout);
}

// Taken / modified from Googletest
void PrettyMPIUnitTestResultPrinter::OnTestCaseStart(const ::testing::TestCase& test_case) {
  using namespace ::testing;
  using namespace ::testing::internal;
  if (rank_ != 0) return;
  const std::string counts = FormatCountableNoun(test_case.test_to_run_count(), "test", "tests");
  ColoredPrintf(COLOR_GREEN, "[----------] ");
  printf("%s from %s", counts.c_str(), test_case.name());
  if (test_case.type_param() == NULL) {
    printf("\n");
  } else {
    printf(", where %s = %s\n", kTypeParamLabel, test_case.type_param());
  }
  fflush(stdout);
}

// Taken / modified from Googletest
void PrettyMPIUnitTestResultPrinter::OnTestStart(const ::testing::TestInfo& test_info) {
  if (rank_ != 0) return;
  ColoredPrintf(COLOR_GREEN, "[ RUN      ] ");
  printf("%s.%s", test_info.test_case_name(), test_info.name());

  printf("\n");
  fflush(stdout);
}

// Taken / modified from Googletest
void PrettyMPIUnitTestResultPrinter::OnTestPartResult(const ::testing::TestPartResult& result) {
  using namespace ::testing;
  using namespace ::testing::internal;
  // If the test part succeeded, we don't need to do anything.
  if (result.type() == TestPartResult::kSuccess) return;
  failed_results_.Add(result);
}

// Taken / modified from Googletest
void PrintFailedTestResultCollection(const TestPartResultCollection& collection, int rank) {
  for (std::size_t i = 0; i < collection.Size(); ++i) {
    std::string m =
        (::testing::Message() << "Rank " << rank << ": "
                              << ::testing::internal::FormatFileLocation(
                                     collection.file_names.get_str(i), collection.line_numbers[i])
                              << " "
                              << TestPartResultTypeToString(
                                     ::testing::TestPartResult::Type(collection.types[i]))
                              << collection.messages.get_str(i))
            .GetString();
    printf("%s\n", m.c_str());
    fflush(stdout);
  }
}

// Taken / modified from Googletest
void PrettyMPIUnitTestResultPrinter::OnTestEnd(const ::testing::TestInfo& test_info) {
  using namespace ::testing;
  using namespace ::testing::internal;

  // check if any ranks failed
  int failed_locally = failed_results_.Size() > 0;
  std::vector<int> failed_flags_per_rank;
  if (rank_ == 0) failed_flags_per_rank.resize(comm_size_);
  MPI_Gather(&failed_locally, 1, MPI_INT, failed_flags_per_rank.data(), 1, MPI_INT, 0, comm_);

  // failed non-root ranks Send to root and exit
  if (rank_ != 0) {
    if (failed_locally) {
      failed_results_.Send(comm_, 0);
    }
    failed_results_.Reset();
    return;
  }

  int failed_globally = failed_locally;
  for (const auto& f : failed_flags_per_rank) {
    if (f) failed_globally = 1;
  }

  // print root failure fist
  if (failed_results_.Size() > 0) {
    PrintFailedTestResultCollection(failed_results_, rank_);
    failed_ranks_.insert(0);
  }

  // receive and print from other failed ranks
  for (int r = 1; r < comm_size_; ++r) {
    if (failed_flags_per_rank[r]) {
      failed_ranks_.insert(r);
      failed_results_.Recv(comm_, r);
      PrintFailedTestResultCollection(failed_results_, r);
    }
  }

  // Reset result storage before next test
  failed_results_.Reset();

  if (!failed_globally) {
    ColoredPrintf(COLOR_GREEN, "[       OK ] ");
    ++num_sucessfull_tests_;
  } else {
    ColoredPrintf(COLOR_RED, "[  FAILED  ] ");
    ++num_failed_tests_;
    TestInfoProperties prop;
    if (test_info.name()) prop.name = test_info.name();
    if (test_info.test_case_name()) prop.case_name = test_info.test_case_name();
    if (test_info.should_run()) prop.should_run = test_info.should_run();
    if (test_info.type_param()) prop.type_param = test_info.type_param();
    if (test_info.value_param()) prop.value_param = test_info.value_param();
    for (int r = 0; r < comm_size_; ++r) {
      if (failed_flags_per_rank[r]) {
        prop.ranks.insert(r);
      }
    }
    failed_test_properties_.emplace_back(std::move(prop));
  }

  printf("%s.%s", test_info.test_case_name(), test_info.name());
  if (failed_globally) PrintFullTestCommentIfPresent(test_info);

  if (GTEST_FLAG(print_time)) {
    printf(" (%s ms)\n", internal::StreamableToString(test_info.result()->elapsed_time()).c_str());
  } else {
    printf("\n");
  }
  fflush(stdout);
}

// Taken / modified from Googletest
void PrettyMPIUnitTestResultPrinter::OnTestCaseEnd(const ::testing::TestCase& test_case) {
  using namespace ::testing;
  if (!GTEST_FLAG(print_time) || rank_ != 0) return;

  const std::string counts = FormatCountableNoun(test_case.test_to_run_count(), "test", "tests");
  ColoredPrintf(COLOR_GREEN, "[----------] ");
  printf("%s from %s (%s ms total)\n\n", counts.c_str(), test_case.name(),
         internal::StreamableToString(test_case.elapsed_time()).c_str());
  fflush(stdout);
}

static std::string FormatSet(const std::set<int>& s) {
  std::string res;
  for (const auto& val : s) {
    res += std::to_string(val);
    if (val != *(--s.end())) {
      res += ", ";
    }
  }
  // res.resize(res.size() - 2); // remove last comma
  return res;
}

// Taken / modified from Googletest
static void PrintFullTestCommentIfPresent(const std::string& type_param,
                                          const std::string& value_param) {
  if (!type_param.empty() || !value_param.empty()) {
    printf(", where ");
    if (!type_param.empty()) {
      printf("%s = %s", kTypeParamLabel, type_param.c_str());
      if (!value_param.empty()) printf(" and ");
    }
    if (!value_param.empty()) {
      printf("%s = %s", kValueParamLabel, value_param.c_str());
    }
    printf(",");
  }
}

// Taken / modified from Googletest
void PrettyMPIUnitTestResultPrinter::OnTestIterationEnd(const ::testing::UnitTest& unit_test,
                                                        int /*iteration*/) {
  using namespace ::testing;
  failed_results_.Reset();
  if (rank_ != 0) {
    return;
  }

  ColoredPrintf(COLOR_GREEN, "[==========] ");
  printf("%s from %s ran on %d ranks.", FormatTestCount(unit_test.test_to_run_count()).c_str(),
         FormatTestCaseCount(unit_test.test_case_to_run_count()).c_str(), comm_size_);
  if (GTEST_FLAG(print_time)) {
    printf(" (%s ms total)", internal::StreamableToString(unit_test.elapsed_time()).c_str());
  }
  printf("\n");
  ColoredPrintf(COLOR_GREEN, "[  PASSED  ] ");
  printf("%s.\n", FormatTestCount(num_sucessfull_tests_).c_str());

  if (num_failed_tests_) {
    ColoredPrintf(COLOR_RED, "[  FAILED  ] ");
    printf("%s, listed below:\n", FormatTestCount(num_failed_tests_).c_str());
    for (const auto& prop : failed_test_properties_) {
      if (!prop.should_run) continue;
      ColoredPrintf(COLOR_RED, "[  FAILED  ] ");
      printf("%s.%s", prop.case_name.c_str(), prop.name.c_str());
      PrintFullTestCommentIfPresent(prop.type_param, prop.value_param);
      printf(" on ranks [%s]", FormatSet(prop.ranks).c_str());
      printf("\n");
    }
  }

  int num_disabled = unit_test.reportable_disabled_test_count();
  if (num_disabled && !GTEST_FLAG(also_run_disabled_tests)) {
    if (!num_failed_tests_) {
      printf("\n");  // Add a spacer if no FAILURE banner is displayed.
    }
    ColoredPrintf(COLOR_YELLOW, "  YOU HAVE %d DISABLED %s\n\n", num_disabled,
                  num_disabled == 1 ? "TEST" : "TESTS");
  }
  // Ensure that Google Test output is printed before, e.g., heapchecker output.
  fflush(stdout);
}

// Taken / modified from Googletest
void PrettyMPIUnitTestResultPrinter::OnEnvironmentsTearDownStart(
    const ::testing::UnitTest& /*unit_test*/) {
  if (rank_ != 0) return;
  ColoredPrintf(COLOR_GREEN, "[----------] ");
  printf("Global MPI test environment tear-down\n");
  fflush(stdout);
}

}  // anonymous namespace
}  // namespace gtest_mpi

#endif


================================================
FILE: libs/gtest_mpi/include/gtest_mpi/gtest_mpi_internal.hpp
================================================
// This project contains source code from the Googletest framework
// obtained from https://github.com/google/googletest with the following
// terms:
//
// Copyright 2005, Google Inc.
// All rights reserved.
//
// Redistribution and use in source and binary forms, with or without
// modification, are permitted provided that the following conditions are
// met:
//
//     * Redistributions of source code must retain the above copyright
// notice, this list of conditions and the following disclaimer.
//     * Redistributions in binary form must reproduce the above
// copyright notice, this list of conditions and the following disclaimer
// in the documentation and/or other materials provided with the
// distribution.
//     * Neither the name of Google Inc. nor the names of its
// contributors may be used to endorse or promote products derived from
// this software without specific prior written permission.
//
// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
// "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
// LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
// A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
// OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
// SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
// LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
// DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
// THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
// ---------------------------------------------------------------------
//
// Modifications and additions are published under the following terms:
//
// Copyright 2019, Simon Frasch
// All rights reserved.
//
// Redistribution and use in source and binary forms, with or without
// modification, are permitted provided that the following conditions are
// met:
//
//     * Redistributions of source code must retain the above copyright
// notice, this list of conditions and the following disclaimer.
//     * Redistributions in binary form must reproduce the above
// copyright notice, this list of conditions and the following disclaimer
// in the documentation and/or other materials provided with the
// distribution.
//     * Neither the name of the copyright holder nor the names of its
// contributors may be used to endorse or promote products derived from
// this software without specific prior written permission.
//
// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
// "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
// LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
// A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
// OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
// SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
// LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
// DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
// THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
// ---------------------------------------------------------------------

#ifndef GUARD_GTEST_MPI_INTERNAL_HPP
#define GUARD_GTEST_MPI_INTERNAL_HPP
#include <gtest/gtest.h>
#include <mpi.h>
#include <unistd.h>
#include <cstdarg>
#include <string>

namespace gtest_mpi {
namespace { // no external linkage

// Taken / modified from Googletest
static const char kDisableTestFilter[] = "DISABLED_*:*/DISABLED_*";
static const char kDeathTestCaseFilter[] = "*DeathTest:*DeathTest/*";
static const char kUniversalFilter[] = "*";
static const char kDefaultOutputFormat[] = "xml";
static const char kDefaultOutputFile[] = "test_detail";
static const char kTestShardIndex[] = "GTEST_SHARD_INDEX";
static const char kTestTotalShards[] = "GTEST_TOTAL_SHARDS";
static const char kTestShardStatusFile[] = "GTEST_SHARD_STATUS_FILE";
static const char kTypeParamLabel[] = "TypeParam";
static const char kValueParamLabel[] = "GetParam()";

// Taken / modified from Googletest
enum GTestColor { COLOR_DEFAULT, COLOR_RED, COLOR_GREEN, COLOR_YELLOW };

// Taken / modified from Googletest
static void PrintFullTestCommentIfPresent(const ::testing::TestInfo& test_info) {
  const char* const type_param = test_info.type_param();
  const char* const value_param = test_info.value_param();

  if (type_param != NULL || value_param != NULL) {
    printf(", where ");
    if (type_param != NULL) {
      printf("%s = %s", kTypeParamLabel, type_param);
      if (value_param != NULL) printf(" and ");
    }
    if (value_param != NULL) {
      printf("%s = %s", kValueParamLabel, value_param);
    }
  }
}

// Taken / modified from Googletest
bool ShouldUseColor(bool stdout_is_tty) {
  using namespace ::testing;
  using namespace ::testing::internal;
  const char* const gtest_color = GTEST_FLAG(color).c_str();

  if (String::CaseInsensitiveCStringEquals(gtest_color, "auto")) {
#if GTEST_OS_WINDOWS && !GTEST_OS_WINDOWS_MINGW
    // On Windows the TERM variable is usually not set, but the
    // console there does support colors.
    return stdout_is_tty;
#else
    // On non-Windows platforms, we rely on the TERM variable.
    const char* const term = getenv("TERM");
    const bool term_supports_color =
        String::CStringEquals(term, "xterm") || String::CStringEquals(term, "xterm-color") ||
        String::CStringEquals(term, "xterm-256color") || String::CStringEquals(term, "screen") ||
        String::CStringEquals(term, "screen-256color") || String::CStringEquals(term, "tmux") ||
        String::CStringEquals(term, "tmux-256color") ||
        String::CStringEquals(term, "rxvt-unicode") ||
        String::CStringEquals(term, "rxvt-unicode-256color") ||
        String::CStringEquals(term, "linux") || String::CStringEquals(term, "cygwin");
    return stdout_is_tty && term_supports_color;
#endif // GTEST_OS_WINDOWS
  }

  return String::CaseInsensitiveCStringEquals(gtest_color, "yes") ||
         String::CaseInsensitiveCStringEquals(gtest_color, "true") ||
         String::CaseInsensitiveCStringEquals(gtest_color, "t") ||
         String::CStringEquals(gtest_color, "1");
  // We take "yes", "true", "t", and "1" as meaning "yes".  If the
  // value is neither one of these nor "auto", we treat it as "no" to
  // be conservative.
}

// Taken / modified from Googletest
static const char* GetAnsiColorCode(GTestColor color) {
  switch (color) {
    case COLOR_RED:
      return "1";
    case COLOR_GREEN:
      return "2";
    case COLOR_YELLOW:
      return "3";
    default:
      return NULL;
  };
}

// Taken / modified from Googletest
static void ColoredPrintf(GTestColor color, const char* fmt, ...) {
  va_list args;
  va_start(args, fmt);

  static const bool in_color_mode = ShouldUseColor(isatty(fileno(stdout)) != 0);
  const bool use_color = in_color_mode && (color != COLOR_DEFAULT);

  if (!use_color) {
    vprintf(fmt, args);
    va_end(args);
    return;
  }

  printf("\033[0;3%sm", GetAnsiColorCode(color));
  vprintf(fmt, args);
  printf("\033[m"); // Resets the terminal to default.
  va_end(args);
}

// Taken / modified from Googletest
::testing::internal::Int32 Int32FromEnvOrDie(const char* var,
                                             ::testing::internal::Int32 default_val) {
  using namespace ::testing;
  using namespace ::testing::internal;
  const char* str_val = getenv(var);
  if (str_val == NULL) {
    return default_val;
  }

  Int32 result;
  if (!ParseInt32(Message() << "The value of environment variable " << var, str_val, &result)) {
    exit(EXIT_FAILURE);
  }
  return result;
}

// Taken / modified from Googletest
static std::string FormatCountableNoun(int count, const char* singular_form,
                                       const char* plural_form) {
  using namespace ::testing;
  return internal::StreamableToString(count) + " " + (count == 1 ? singular_form : plural_form);
}

// Taken / modified from Googletest
static std::string FormatTestCount(int test_count) {
  return FormatCountableNoun(test_count, "test", "tests");
}

// Taken / modified from Googletest
static std::string FormatTestCaseCount(int test_case_count) {
  return FormatCountableNoun(test_case_count, "test case", "test cases");
}

// Taken / modified from Googletest
static const char* TestPartResultTypeToString(::testing::TestPartResult::Type type) {
  switch (type) {
    case ::testing::TestPartResult::kSuccess:
      return "Success";

    case ::testing::TestPartResult::kNonFatalFailure:
    case ::testing::TestPartResult::kFatalFailure:
#ifdef _MSC_VER
      return "error: ";
#else
      return "Failure\n";
#endif
    default:
      return "Unknown result type";
  }
}

// Taken / modified from Googletest
bool ShouldShard(const char* total_shards_env, const char* shard_index_env,
                 bool in_subprocess_for_death_test) {
  using namespace ::testing;
  using namespace ::testing::internal;
  if (in_subprocess_for_death_test) {
    return false;
  }

  const Int32 total_shards = Int32FromEnvOrDie(total_shards_env, -1);
  const Int32 shard_index = Int32FromEnvOrDie(shard_index_env, -1);

  if (total_shards == -1 && shard_index == -1) {
    return false;
  } else if (total_shards == -1 && shard_index != -1) {
    const Message msg = Message() << "Invalid environment variables: you have " << kTestShardIndex
                                  << " = " << shard_index << ", but have left " << kTestTotalShards
                                  << " unset.\n";
    ColoredPrintf(COLOR_RED, msg.GetString().c_str());
    fflush(stdout);
    exit(EXIT_FAILURE);
  } else if (total_shards != -1 && shard_index == -1) {
    const Message msg = Message() << "Invalid environment variables: you have " << kTestTotalShards
                                  << " = " << total_shards << ", but have left " << kTestShardIndex
                                  << " unset.\n";
    ColoredPrintf(COLOR_RED, msg.GetString().c_str());
    fflush(stdout);
    exit(EXIT_FAILURE);
  } else if (shard_index < 0 || shard_index >= total_shards) {
    const Message msg =
        Message() << "Invalid environment variables: we require 0 <= " << kTestShardIndex << " < "
                  << kTestTotalShards << ", but you have " << kTestShardIndex << "=" << shard_index
                  << ", " << kTestTotalShards << "=" << total_shards << ".\n";
    ColoredPrintf(COLOR_RED, msg.GetString().c_str());
    fflush(stdout);
    exit(EXIT_FAILURE);
  }

  return total_shards > 1;
}

// info from TestInfo, which does not have a copy constructor
struct TestInfoProperties {
  std::string name;
  std::string case_name;
  std::string type_param;
  std::string value_param;
  bool should_run;
  std::set<int> ranks;
};

// Holds null terminated strings in a single vector,
// which can be exchanged in a single MPI call
class StringCollection {
public:
  void Add(const char* s) {
    int size = 0;
    for (; *s != '\0'; ++s, ++size) {
      text.push_back(*s);
    }
    text.push_back('\0');
    start_indices.push_back(prev_size);
    prev_size = size + 1;
  }

  // Sends content to requested rank
  void Send(MPI_Comm comm, int rank) const {
    MPI_Send(text.data(), text.size(), MPI_CHAR, rank, 0, comm);
    MPI_Send(start_indices.data(), start_indices.size(), MPI_INT, rank, 0, comm);
  }

  // Overrides content with data from requested rank
  void Recv(MPI_Comm comm, int rank) {
    MPI_Status status;
    int count = 0;

    // Recv text
    MPI_Probe(rank, 0, comm, &status);
    MPI_Get_count(&status, MPI_CHAR, &count);
    text.resize(count);
    MPI_Recv(text.data(), count, MPI_CHAR, rank, 0, comm, MPI_STATUS_IGNORE);

    // Recv sizes
    MPI_Probe(rank, 0, comm, &status);
    MPI_Get_count(&status, MPI_INT, &count);
    start_indices.resize(count);
    MPI_Recv(start_indices.data(), count, MPI_INT, rank, 0, comm, MPI_STATUS_IGNORE);
  }

  void Reset() {
    text.clear();
    start_indices.clear();
    prev_size = 0;
  }

  const char* get_str(const int id) const { return text.data() + start_indices[id]; }

  const std::size_t Size() const { return start_indices.size(); }

private:
  int prev_size = 0;
  std::vector<char> text;
  std::vector<int> start_indices;
};

// All info recuired to print a failed test result.
// Includes functionality for MPI exchange
struct TestPartResultCollection {
  // Sends content to requested rank
  void Send(MPI_Comm comm, int rank) {
    MPI_Send(types.data(), types.size(), MPI_INT, rank, 0, comm);
    MPI_Send(line_numbers.data(), line_numbers.size(), MPI_INT, rank, 0, comm);
    summaries.Send(comm, rank);
    messages.Send(comm, rank);
    file_names.Send(comm, rank);
  }

  // Overrides content with data from requested rank
  void Recv(MPI_Comm comm, int rank) {
    MPI_Status status;
    int count = 0;

    // Recv text
    MPI_Probe(rank, 0, comm, &status);
    MPI_Get_count(&status, MPI_INT, &count);
    types.resize(count);
    MPI_Recv(types.data(), count, MPI_INT, rank, 0, comm, MPI_STATUS_IGNORE);

    // Recv sizes
    MPI_Probe(rank, 0, comm, &status);
    MPI_Get_count(&status, MPI_INT, &count);
    line_numbers.resize(count);
    MPI_Recv(line_numbers.data(), count, MPI_INT, rank, 0, comm, MPI_STATUS_IGNORE);

    summaries.Recv(comm, rank);
    messages.Recv(comm, rank);
    file_names.Recv(comm, rank);
  }

  void Add(const ::testing::TestPartResult& result) {
    types.push_back(result.type());
    line_numbers.push_back(result.line_number());
    summaries.Add(result.summary());
    messages.Add(result.message());
    file_names.Add(result.file_name());
  }

  void Reset() {
    types.clear();
    line_numbers.clear();
    summaries.Reset();
    messages.Reset();
    file_names.Reset();
  }

  std::size_t Size() const { return types.size(); }

  std::vector<int> types;
  std::vector<int> line_numbers;
  StringCollection summaries;
  StringCollection messages;
  StringCollection file_names;
};

} // anonymous namespace
} // namespace gtest_mpi
#endif


================================================
FILE: miniapp/CMakeLists.txt
================================================
################
#  Build test  #
################
set(executables "layout_miniapp" "cosma_miniapp" "cosma_statistics")

foreach(exec ${executables})
    add_executable(${exec} "${exec}.cpp")
    target_link_libraries(${exec} PRIVATE cosma cxxopts::cxxopts)
    install(TARGETS ${exec} RUNTIME DESTINATION "${CMAKE_INSTALL_PREFIX}/bin")
endforeach()

if(NOT COSMA_SCALAPACK MATCHES "OFF")
    set(scalapack_executables "pxgemm_miniapp")
    foreach(exec ${scalapack_executables})
        add_executable(${exec} "${exec}.cpp")
        target_link_libraries(${exec} PRIVATE cosma_pxgemm_cpp cxxopts::cxxopts)
        install(TARGETS ${exec} RUNTIME DESTINATION "${CMAKE_INSTALL_PREFIX}/bin")
        if (COSMA_WITH_PROFILING)
            target_link_libraries(${exec} PRIVATE semiprof::semiprof)
            target_compile_definitions(${exec} PRIVATE COSMA_WITH_PROFILING)
        endif()
    endforeach()
endif()


================================================
FILE: miniapp/cosma_miniapp.cpp
================================================
#include <cosma/multiply.hpp>

#include <algorithm>
#include <cctype>
#include <chrono>
#include <cstdlib>
#include <fstream>
#include <iostream>
#include <limits>
#include <sstream>
#include <string>
#include <vector>
#include "../utils/parse_strategy.hpp"
#include "../utils/cosma_utils.hpp"

#include <cxxopts.hpp>

using namespace cosma;

template <typename T>
void fill_int(T* ptr, size_t size) {
    for (unsigned i = 0u; i < size; ++i) {
        ptr[i] = 10*drand48();
    }
}

template <typename T>
void output_matrix(CosmaMatrix<T> &M, int rank) {
    std::string local = M.which_matrix() + std::to_string(rank) + ".txt";
    std::ofstream local_file(local);
    local_file << M << std::endl;
    local_file.close();
}

template <typename T>
bool run(const int m, const int n, const int k, 
         const std::vector<std::string>& steps, 
         long& timing, const bool test_correctness,
         MPI_Comm comm = MPI_COMM_WORLD) {
    int rank, size;
    MPI_Comm_rank(comm, &rank);
    MPI_Comm_size(comm, &size);

    // specified by the environment variable COSMA_CPU_MAX_MEMORY
    long long memory_limit = cosma::get_cpu_max_memory<T>();

    if (!test_correctness) {
        // specified by the env var COSMA_OVERLAP_COMM_AND_COMP
        bool overlap_comm_and_comp = cosma::get_overlap_comm_and_comp();
        const Strategy& strategy = parse_strategy(m, n, k, size,
                                                  steps,
                                                  memory_limit,
                                                  overlap_comm_and_comp);

        if (rank == 0) {
            std::cout << "Strategy = " << strategy << std::endl;
        }

        // Declare A,B and C COSMA matrices objects
        CosmaMatrix<T> A('A', strategy, rank);
        CosmaMatrix<T> B('B', strategy, rank);
        CosmaMatrix<T> C('C', strategy, rank);

        T alpha{1};
        T beta{0};

        // fill the matrices with random data
        srand48(rank);
        fill_int(A.matrix_pointer(), A.matrix_size());
        fill_int(B.matrix_pointer(), B.matrix_size());

        MPI_Barrier(comm);
        auto start = std::chrono::steady_clock::now();
        multiply(A, B, C, strategy, comm, alpha, beta);
        MPI_Barrier(comm);
        auto end = std::chrono::steady_clock::now();

        timing 
            = std::chrono::duration_cast<std::chrono::milliseconds>(end - start)
            .count();

        return true;
    } else {
        // specified by the env var COSMA_OVERLAP_COMM_AND_COMP
        const Strategy& strategy_no_overlap = parse_strategy(m, n, k, size,
                                                  steps,
                                                  memory_limit,
                                                  false);
        const Strategy& strategy_with_overlap = parse_strategy(m, n, k, size,
                                                  steps,
                                                  memory_limit,
                                                  true);
        if (rank == 0) {
            std::cout << "Strategy = " << strategy_no_overlap << std::endl;
        }

        auto ctx = cosma::make_context<T>();

        // first run without overlapping communication and computation
        bool isOK = test_cosma<T>(strategy_no_overlap, ctx, comm);
        // then run with the overlap of communication and computation
        isOK = isOK && test_cosma<T>(strategy_with_overlap, ctx, comm);

        return rank == 0 ? isOK : true;
    }
}

int main(int argc, char **argv) {
    cxxopts::Options options("COSMA MINIAPP", 
        "A miniapp computing: `C=A*B, where dim(A)=m*k, dim(B)=k*n, dim(C)=m*n");
    options.add_options()
        ("m,m_dim",
            "number of rows of A and C.", 
            cxxopts::value<int>()->default_value("1000"))
        ("n,n_dim",
            "number of columns of B and C.",
            cxxopts::value<int>()->default_value("1000"))
        ("k,k_dim",
            "number of columns of A and rows of B.", 
            cxxopts::value<int>()->default_value("1000"))
        ("s,steps", 
            "Division steps that the algorithm should perform.",
            cxxopts::value<std::vector<std::string>>()->default_value(""))
        ("r,n_rep",
            "number of repetitions.", 
            cxxopts::value<int>()->default_value("2"))
        ("t,type",
            "data type of matrix entries.",
            cxxopts::value<std::string>()->default_value("double"))
        ("test",
            "test the result correctness.",
            cxxopts::value<bool>()->default_value("false"))
        ("h,help", "Print usage.")
        ;

    auto result = options.parse(argc, argv);
    if (result.count("help")) {
        std::cout << options.help() << std::endl;
        return 0;
    }

    auto m = result["m_dim"].as<int>();
    auto n = result["n_dim"].as<int>();
    auto k = result["k_dim"].as<int>();
    auto steps = result["steps"].as<std::vector<std::string>>();
    auto n_rep = result["n_rep"].as<int>();
    auto type = result["type"].as<std::string>();
    // transform to lower-case
    std::transform(type.begin(), type.end(), type.begin(), 
        [&](char c) {
            return std::tolower(c);
        }
    );
    // check if the type option takes a correct value
    std::unordered_set<std::string> type_options = {
        "float", "double", "zfloat", "zdouble"
    };
    if (type_options.find(type) == type_options.end()) {
        std::cout << "COSMA (cosma_miniapp.cpp): ERROR: --type option: can only take the following values: " << std::endl;
        for (const auto& el : type_options) {
            std::cout << el << ", ";
        }
        std::cout << std::endl;
        return 0;
    }

    bool test_correctness = result["test"].as<bool>();
    // some basic checks
    if (test_correctness) {
        // if testing correctness, n_rep = 1;
        n_rep = 1;
        std::cout << "COSMA(cosma_miniapp.cpp): WARNING: correctness checking enabled, setting `n_rep` to 1." << std::endl;
    }

    MPI_Init(&argc, &argv);

    int P, rank;
    MPI_Comm_size(MPI_COMM_WORLD, &P);
    MPI_Comm_rank(MPI_COMM_WORLD, &rank);

    bool result_correct = true;

    std::vector<long> times;
    for (int i = 0; i < n_rep; ++i) {
        long t_run = 0;
        try {
            if (type == "double") {
                result_correct = 
                run<double>(m, n, k, steps, 
                            t_run, test_correctness, MPI_COMM_WORLD);
            } else if (type == "float") {
                result_correct = 
                run<float>(m, n, k, steps, 
                           t_run, test_correctness, MPI_COMM_WORLD);
            } else if (type == "zdouble") {
                result_correct = 
                run<std::complex<double>>(m, n, k, steps, 
                                          t_run, test_correctness, MPI_COMM_WORLD);
            } else if (type == "zfloat") {
                result_correct = 
                run<std::complex<float>>(m, n, k, steps, 
                                         t_run, test_correctness, MPI_COMM_WORLD);
            } else {
                throw std::runtime_error("COSMA(cosma_miniapp): unknown data type of matrix entries.");
            }
        } catch (const std::exception& e) {
            int flag = 0;
            MPI_Finalized(&flag);
            if (!flag) {
                MPI_Abort(MPI_COMM_WORLD, -1);
                MPI_Finalize();
            }
            return 0;
        }
        times.push_back(t_run);
    }
    std::sort(times.begin(), times.end());

    // time is only measured if correctness checking was disabled
    if (!test_correctness && rank == 0) {
        std::cout << "COSMA TIMES [ms] = ";
        for (auto &time : times) {
            std::cout << time << " ";
        }
        std::cout << std::endl;
    }

    if (test_correctness && rank == 0) {
        std::string yes_no = result_correct ? "" : " NOT";
        std::cout << "Result is" << yes_no << " CORRECT!" << std::endl;
    }

    MPI_Finalize();

    return 0;
}


================================================
FILE: miniapp/cosma_statistics.cpp
================================================
/*
Simulates the algorithm (without actually computing the matrix multiplication)
 * in order to get the total volume of the communication, the maximum volume of computation
 * done in a single branch and the maximum required buffer size that the algorithm requires.
 */
#include "../utils/parse_strategy.hpp"
#include <cosma/statistics.hpp>
#include <cxxopts.hpp>

#include <iostream>

using namespace cosma;

int main( int argc, char **argv ) {
    cxxopts::Options options("COSMA STATISTICS",
                             "A miniapp computing communication volume \
                             and local multiplication sizes. dim(A)=m*k, dim(B)=k*n; dim(C)=m*n.");
    options.add_options()
        ("m,m_dim",
            "number of rows of A and C.", 
            cxxopts::value<int>()->default_value("1000"))
        ("n,n_dim",
            "number of columns of B and C.",
            cxxopts::value<int>()->default_value("1000"))
        ("k,k_dim",
            "number of columns of A and rows of B.", 
            cxxopts::value<int>()->default_value("1000"))
        ("P,n_proc",
            "Number of MPI ranks.", 
            cxxopts::value<int>()->default_value("1"))
        ("s,steps", 
            "Division steps that the algorithm should perform.",
            cxxopts::value<std::vector<std::string>>()->default_value(""))
        ("h,help", "Print usage.")
    ;

    auto result = options.parse(argc, argv);

    auto m = result["m_dim"].as<int>();
    auto n = result["n_dim"].as<int>();
    auto k = result["k_dim"].as<int>();
    auto P = result["n_proc"].as<int>();
    auto steps = result["steps"].as<std::vector<std::string>>();

    bool overlap_comm_and_comp = cosma::get_overlap_comm_and_comp();
    long long memory_limit = cosma::get_cpu_max_memory<double>();

    const Strategy& strategy = parse_strategy(m, n, k, P,
                                              steps,
                                              memory_limit,
                                              overlap_comm_and_comp);

    std::cout << "Strategy: \n" << strategy << std::endl;

    int n_rep = 1;
    multiply(strategy, n_rep);

    return 0;
}


================================================
FILE: miniapp/layout_miniapp.cpp
================================================
#include <cosma/multiply.hpp>

#include <algorithm>
#include <cctype>
#include <chrono>
#include <cstdlib>
#include <fstream>
#include <iostream>
#include <limits>
#include <sstream>
#include <string>
#include <stdlib.h>
#include <vector>


#include "../utils/parse_strategy.hpp"
#include "../utils/cosma_utils.hpp"

#include <cxxopts.hpp>

using namespace cosma;

int main(int argc, char **argv) {
    cxxopts::Options options("NATIVE COSMA LAYOUT MINIAPP", 
        "A miniapp showing the native COSMA data layout for computing C=A*B, where dim(A)=m*k, dim(B)=k*n, dim(C)=m*n");
    options.add_options()
        ("m,m_dim",
            "number of rows of A and C.", 
            cxxopts::value<int>()->default_value("1000"))
        ("n,n_dim",
            "number of columns of B and C.",
            cxxopts::value<int>()->default_value("1000"))
        ("k,k_dim",
            "number of columns of A and rows of B.", 
            cxxopts::value<int>()->default_value("1000"))
        ("P,n_ranks",
            "number of MPI ranks.", 
            cxxopts::value<int>()->default_value("1"))
        ("s,steps", 
            "Division steps that the algorithm should perform. Can be empty.",
            cxxopts::value<std::vector<std::string>>()->default_value(""))
        ("h,help", "Print usage.")
        ;

    auto result = options.parse(argc, argv);
    if (result.count("help")) {
        std::cout << options.help() << std::endl;
        return 0;
    }

    auto m = result["m_dim"].as<int>();
    auto n = result["n_dim"].as<int>();
    auto k = result["k_dim"].as<int>();
    auto P = result["n_ranks"].as<int>();
    auto steps = result["steps"].as<std::vector<std::string>>();

    // prevent the optimization that might reduce the number of ranks
    std::string set_min_local_size = "COSMA_MIN_LOCAL_DIMENSION=1";
    putenv(&set_min_local_size[0]);

    // specified by the environment variable COSMA_CPU_MAX_MEMORY
    long long memory_limit = cosma::get_cpu_max_memory<double>();

    // specified by the env var COSMA_OVERLAP_COMM_AND_COMP
    bool overlap_comm_and_comp = cosma::get_overlap_comm_and_comp();
    const Strategy& strategy = parse_strategy(m, n, k, P,
                                              steps,
                                              memory_limit,
                                              overlap_comm_and_comp);

    std::cout << "Strategy = " << strategy << std::endl;

    int rank = 0;

    // Declare A,B and C COSMA matrices objects
    CosmaMatrix<double> A('A', strategy, rank);
    CosmaMatrix<double> B('B', strategy, rank);
    CosmaMatrix<double> C('C', strategy, rank);

    auto A_layout = A.get_grid_layout();
    auto B_layout = B.get_grid_layout();
    auto C_layout = C.get_grid_layout();

    std::cout << "A matrix layout =\n" << A_layout.grid << std::endl;
    std::cout << "B matrix layout =\n" << B_layout.grid << std::endl;
    std::cout << "C matrix layout =\n" << C_layout.grid << std::endl;

    /*
    if (std::max(std::max(m, n), k) < 20) {
        std::cout << "\n===============================\n\n" << std::endl;
        std::cout << "Visually, the matrices is distributed among ranks as follows:\n\n";
        std::cout << "Matrix A:\n";
        for (unsigned bi = 0; bi < A_layout.grid.num_blocks_row(); ++bi) {
            for (unsigned i = A_layout.grid.rows_interval(bi).start; 
                          i < A_layout.grid.rows_interval(bi).end; 
                          ++i) {
                for (unsigned bj = 0; bj < A_layout.grid.num_blocks_col(); ++bj) {
                    auto owner = A_layout.grid.owner(bi, bj);

                    for (unsigned j = A_layout.grid.cols_interval(bj).start; 
                                  j < A_layout.grid.cols_interval(bj).end; 
                                  ++j) {
                        std::cout << owner << "\t";
                    }
                }
                std::cout << "\n";
            }
        }
    }
    */

    return 0;
}


================================================
FILE: miniapp/pxgemm_miniapp.cpp
================================================
// from std
#include "../utils/pxgemm_utils.hpp"
#include <cxxopts.hpp>
#include <unordered_set> 

using namespace cosma;

int main(int argc, char **argv) {
    // **************************************
    //   setup command-line parser
    // **************************************
    cxxopts::Options options("COSMA PXGEMM MINIAPP", 
        "A miniapp computing: `C = alpha*A*B + beta*C` and comparing the performance of COSMA (with scalapack wrappers) VS SCALAPACK.");

    // **************************************
    //   readout the command line arguments
    // **************************************
    // matrix dimensions
    // dim(A) = m*k, dim(B) = k*n, dim(C) = m*n
    options.add_options()
        ("m,m_dim",
            "number of rows of A and C.", 
            cxxopts::value<int>()->default_value("1000"))
        ("n,n_dim",
            "number of columns of B and C.",
            cxxopts::value<int>()->default_value("1000"))
        ("k,k_dim",
            "number of columns of A and rows of B.", 
            cxxopts::value<int>()->default_value("1000"))
        ("block_a",
            "block dimensions for matrix A.",
             cxxopts::value<std::vector<int>>()->default_value("128,128"))
        ("block_b",
            "block dimensions for matrix B.",
             cxxopts::value<std::vector<int>>()->default_value("128,128"))
        ("block_c",
            "block dimensions for matrix C.",
             cxxopts::value<std::vector<int>>()->default_value("128,128"))
        ("p,p_grid",
            "processor 2D-decomposition.",
             cxxopts::value<std::vector<int>>()->default_value("1,1"))
        ("transpose",
            "transpose/Conjugate flags for A and B.",
             cxxopts::value<std::string>()->default_value("NN"))
        ("alpha",
            "alpha parameter in C = alpha*A*B + beta*C.",
            cxxopts::value<int>()->default_value("1"))
        ("beta",
            "beta parameter in C = alpha*A*B + beta*C.",
            cxxopts::value<int>()->default_value("0"))
        ("r,n_rep",
            "number of repetitions",
            cxxopts::value<int>()->default_value("2"))
        ("t,type",
            "data type of matrix entries.",
            cxxopts::value<std::string>()->default_value("double"))
        ("test",
            "test the result correctness.",
            cxxopts::value<bool>()->default_value("false"))
        ("algorithm", 
            "defines which algorithm (cosma, scalapack or both) to run",
            cxxopts::value<std::string>()->default_value("both"))
        ("h,help", "Print usage.")
    ;

    auto result = options.parse(argc, argv);
    if (result.count("help")) {
        std::cout << options.help() << std::endl;
        return 0;
    }

    auto m = result["m_dim"].as<int>();
    auto n = result["n_dim"].as<int>();
    auto k = result["k_dim"].as<int>();

    auto block_a = result["block_a"].as<std::vector<int>>();
    auto block_b = result["block_b"].as<std::vector<int>>();
    auto block_c = result["block_c"].as<std::vector<int>>();

    auto p_grid = result["p_grid"].as<std::vector<int>>();

    auto transpose = result["transpose"].as<std::string>();
    // transform to upper-case
    std::transform(transpose.begin(), transpose.end(), transpose.begin(), 
        [&](char c) {
            return std::toupper(c);
        }
    );
    std::unordered_set<std::string> transpose_options = {
        "NN", "TT", "NT", "TN"
    };
    // check if transpose takes a correct value
    if (std::find(transpose_options.begin(), transpose_options.end(), transpose) == transpose_options.end()) {
        std::cout << "COSMA (pxgemm_miniapp.cpp): ERROR: --transpose option \
        can only take the following values: " << std::endl;
        for (const auto& el : transpose_options) {
            std::cout << el << ", ";
        }
        std::cout << std::endl;
        return 0;
    }

    auto al = result["alpha"].as<int>();
    auto be = result["beta"].as<int>();
    // check if alpha and beta take correct values
    if ((al != 0 && al != 1) || (be != 0 && be != 1)) {
        std::cout << "COSMA (pxgemm_miniapp.cpp): ERROR: in this miniapp, \
        --alpha and --beta options can only take values 0 or 1 corresponding to \
        the zero and the unit elements (respectively), of the chosen data type. \
        This is not a requirement of COSMA pxgemm wrapper, but just of this miniapp. \
        These elements are chosen because they are well defined also for complex data-types." 
        << std::endl;
        return 0;
    }

    bool test_correctness = result["test"].as<bool>();

    auto n_rep = result["n_rep"].as<int>();

    auto type = result["type"].as<std::string>();
    // transform to lower-case
    std::transform(type.begin(), type.end(), type.begin(), 
        [&](char c) {
            return std::tolower(c);
        }
    );
    // check if the type option takes a correct value
    std::unordered_set<std::string> type_options = {
        "float", "double", "zfloat", "zdouble"
    };
    if (type_options.find(type) == type_options.end()) {
        std::cout << "COSMA (pxgemm_miniapp.cpp): ERROR: --type option: can only take the following values: " << std::endl;
        for (const auto& el : type_options) {
            std::cout << el << ", ";
        }
        std::cout << std::endl;
        return 0;
    }

    char ta = transpose[0];
    char tb = transpose[1];

    // make lower-space
    auto algorithm = result["algorithm"].as<std::string>();
    std::transform(algorithm.begin(), algorithm.end(), algorithm.begin(), 
        [&](char c) {
            return std::tolower(c);
        }
    );

    // check if the algorithm option takes a correct value
    std::unordered_set<std::string> algorithm_options = {
        "cosma", "scalapack", "both"
    };
    if (algorithm_options.find(algorithm) == algorithm_options.end()) {
        std::cout << "COSMA (pxgemm_miniapp.cpp): ERROR: --algorithm option: can only take the following values: " << std::endl;
        for (const auto& el : algorithm_options) {
            std::cout << el << ", ";
        }
        std::cout << std::endl;
        return 0;
    }

    // some basic checks
    if (test_correctness) {
        // if testing correctness, n_rep = 1;
        n_rep = 1;
        std::cout << "COSMA(pxgemm_miniapp.cpp): WARNING: correctness checking enabled, setting `n_rep` to 1." << std::endl;
        if (algorithm != "both") {
            std::cout << "COSMA(pxgemm_miniapp.cpp): WARNING: correctness checking enabled, setting `algorithm` to `both`." << std::endl;
            algorithm = "both";
        }
    }

    std::vector<long> cosma_times;
    std::vector<long> scalapack_times;

    bool result_correct = true;

    // initilize MPI
    MPI_Init(&argc, &argv);

    int rank, P;
    MPI_Comm_rank(MPI_COMM_WORLD, &rank);
    MPI_Comm_size(MPI_COMM_WORLD, &P);

    // check if processor grid corresponds to P
    if (p_grid[0] * p_grid[1] != P) {
        p_grid[0] = 1;
        p_grid[1] = P;
        if (rank == 0) {
            std::cout << "COSMA(pxgemm_miniapp.cpp): warning: number of processors in the grid must be equal to P, setting grid to 1xP instead." << std::endl;
        }
    }

    // *******************************
    //   perform the multiplication
    // ******************************
    // no blacs functions will be invoked afterwards
    bool exit_blacs = true;
    try {
        if (type == "double") {
            // create the context here, so that
            // it doesn't have to be created later
            // (this is not necessary)
            auto ctx = cosma::get_context_instance<double>();
            if (rank == 0) {
                ctx->turn_on_output();
            }

            double alpha = double{1.0 * al};
            double beta = double{1.0 * be};
            pxgemm_params<double> params(m, n, k, 
                                         block_a[0], block_a[1],
                                         block_b[0], block_b[1],
                                         block_c[0], block_c[1],
                                         p_grid[0], p_grid[1],
                                         ta, tb,
                                         alpha, beta);

            // **************************************
            //    output the problem description
            // **************************************
            if (rank == 0) {
                std::cout << "Running PDGEMM on the following problem:" << std::endl;
                std::cout << params << std::endl;
            }

            result_correct = benchmark_pxgemm<double>(params, MPI_COMM_WORLD, n_rep,
                                    algorithm,
                                    cosma_times, scalapack_times, 
                                    test_correctness, exit_blacs);
        } else if (type == "float") {
            // create the context here, so that
            // it doesn't have to be created later
            // (this is not necessary)
            auto ctx = cosma::get_context_instance<float>();
            if (rank == 0) {
                ctx->turn_on_output();
            }

            float alpha = float{1.0f * al};
            float beta = float{1.0f * be};
            pxgemm_params<float> params(m, n, k, 
                                         block_a[0], block_a[1],
                                         block_b[0], block_b[1],
                                         block_c[0], block_c[1],
                                         p_grid[0], p_grid[1],
                                         ta, tb,
                                         alpha, beta);

            // **************************************
            //    output the problem description
            // **************************************
            if (rank == 0) {
                std::cout << "Running PSGEMM on the following problem:" << std::endl;
                std::cout << params << std::endl;
            }

            result_correct = benchmark_pxgemm<float>(params, MPI_COMM_WORLD, n_rep,
                                    algorithm,
                                    cosma_times, scalapack_times,
                                    test_correctness, exit_blacs);

        } else if (type == "zfloat") {
            // create the context here, so that
            // it doesn't have to be created later
            // (this is not necessary)
            auto ctx = cosma::get_context_instance<std::complex<float>>();
            if (rank == 0) {
                ctx->turn_on_output();
            }

            std::complex<float> alpha = std::complex<float>{1.0f * al};
            std::complex<float> beta = std::complex<float>{1.0f * be};
            pxgemm_params<std::complex<float>> params(m, n, k, 
                                         block_a[0], block_a[1],
                                         block_b[0], block_b[1],
                                         block_c[0], block_c[1],
                                         p_grid[0], p_grid[1],
                                         ta, tb,
                                         alpha, beta);

            // **************************************
            //    output the problem description
            // **************************************
            if (rank == 0) {
                std::cout << "Running PCGEMM on the following problem:" << std::endl;
                std::cout << params << std::endl;
            }

            result_correct = benchmark_pxgemm<std::complex<float>>(params, MPI_COMM_WORLD, n_rep,
                                    algorithm,
                                    cosma_times, scalapack_times,
                                    test_correctness, exit_blacs);
        } else if (type == "zdouble") {
            // create the context here, so that
            // it doesn't have to be created later
            // (this is not necessary)
            auto ctx = cosma::get_context_instance<std::complex<double>>();
            if (rank == 0) {
                ctx->turn_on_output();
            }

            std::complex<double> alpha = std::complex<double>{1.0 * al};
            std::complex<double> beta = std::complex<double>{1.0 * be};
            pxgemm_params<std::complex<double>> params(m, n, k, 
                                         block_a[0], block_a[1],
                                         block_b[0], block_b[1],
                                         block_c[0], block_c[1],
                                         p_grid[0], p_grid[1],
                                         ta, tb,
                                         alpha, beta);

            // **************************************
            //    output the problem description
            // **************************************
            if (rank == 0) {
                std::cout << "Running PZGEMM on the following problem:" << std::endl;
                std::cout << params << std::endl;
            }

            result_correct = benchmark_pxgemm<std::complex<double>>(params, MPI_COMM_WORLD, n_rep,
                                    algorithm,
                                    cosma_times, scalapack_times,
                                    test_correctness, exit_blacs);
        } else {
            throw std::runtime_error("COSMA(pxgemm_miniapp): unknown data type of matrix entries.");
        }
    } catch (const std::exception& e) {
        // MPI is already finalized, but just in case
        std::cout << e.what() << std::endl;
        int flag = 0;
        MPI_Finalized(&flag);
        if (!flag) {
            MPI_Abort(MPI_COMM_WORLD, -1);
            MPI_Finalize();
        }
        return 0;
    }

    // *****************
    //   output times
    // *****************
    if (rank == 0) {
        if (algorithm == "both" || algorithm == "cosma") {
            std::cout << "COSMA TIMES [ms] = ";
            for (auto &time : cosma_times) {
                std::cout << time << " ";
            }
            std::cout << std::endl;
        }

        if (algorithm == "both" || algorithm == "scalapack") {
            std::cout << "SCALAPACK TIMES [ms] = ";
            for (auto &time : scalapack_times) {
                std::cout << time << " ";
            }
            std::cout << std::endl;
        }
    }

    if (test_correctness) {
        int result = result_correct ? 0 : 1;
        int global_result = 0;
        MPI_Reduce(&result, &global_result, 1, MPI_INT, MPI_SUM, 0, MPI_COMM_WORLD);
        if (rank == 0) {
            std::string yes_no = global_result == 0 ? "" : " NOT";
            std::cout << "Result is" << yes_no << " CORRECT!" << std::endl;
        }
    }

    MPI_Finalize();

    return 0;
}


================================================
FILE: scripts/build.sh
================================================
#!/bin/bash

# !!! --------------------------------- !!!
# !!! Adjust the script to your system. !!!
# !!! --------------------------------- !!!

# Clean the build directory.
#
#rm -rf CMakeCache.txt CMakeFiles

# If MKL is used
#
#export MKLROOT=<FIXME>

# If GPU back end is used (Tiled-MM), set the following path:
#
#export CUDA_PATH=<FIXME> 

# Options
# ^^^^^^^
# 
# `CMAKE_BUILD_TYPE` := Debug|Release|Profile (default: Release)
#
# `COSMA_WITH_TESTS`: = ON|OFF (default: ON if COSMA is not a subproject)
#    Enables tests.
#
# `COSMA_WITH_APPS`: = ON|OFF (default: ON if COSMA is not a subproject)
#    Enables miniapps.
#
# `COSMA_WITH_BENCHMARKS`: = ON|OFF (default: ON if COSMA is not a subproject)
#    Enables benchmarks.
#
# `COSMA_WITH_PROFILING`: = ON|OFF (default: OFF)
#    Enables profiling of COSMA with `semiprof`.
#
# BLAS (select one of:)
#
# `COSMA_BLAS` := MKL|OPENBLAS|CRAY_LIBSCI|CUSTOM|CUDA|ROCM (default: MKL)
#
# `COSMA_SCALAPACK` := OFF|MKL|CRAY_LIBSCI|CUSTOM (default: OFF)
#
#    Note: Mixing OpenMP runtimes results in performance issues. If you use 
#          COSMA within a large application, make sure that a single OpenMP
#          back end is used. If using GCC, that should be GNU OpenMP, except
#          on Mac. COSMA automically selects the right OpenMP runtime back end 
#          based on platform and compiler.
#
cmake <FIXME:cosma_source_dir> \
  -D CMAKE_INSTALL_PREFIX=<FIXME:cosma_install_dir> \


================================================
FILE: scripts/daint-mc_env.sh
================================================
module switch PrgEnv-cray PrgEnv-gnu
module load daint-mc
module load CMake
module load intel # defines $MKLROOT

# enable the dynamic linking and
# the asynchronous thread progressing
# MPICH (on Cray systems)
export CRAYPE_LINK_TYPE=dynamic
export MPICH_NEMESIS_ASYNC_PROGRESS=MC
export MPICH_MAX_THREAD_SAFETY=multiple
export MPICH_GNI_ASYNC_PROGRESS_TIMEOUT=0

# setup the right compilers
export CC=`which cc`
export CXX=`which CC`


================================================
FILE: scripts/install_dependencies.py
================================================
#!/usr/bin/env python3 
import argparse
import os
import sys
import tempfile
import subprocess

parser = argparse.ArgumentParser()
parser.add_argument(
        'prefix', 
        type=str,
        help='Installation prefix for dependencies'
        )
args = parser.parse_args()
if not os.path.isdir(args.prefix):
    print("The argument is not a directory.")
    sys.exit()


def install_lib(tmppath, prefix, libname):
    url = 'https://github.com/kabicm/{libname}.git'.format(**locals())
    clone_dir = os.path.join(tmppath, libname)
    build_dir = os.path.join(tmppath, 'build_{libname}'.format(**locals()))
    install_dir ='{prefix}/{libname}-master'.format(**locals())

    config_cmd = ('cmake ../{libname} '
                    '-DCMAKE_BUILD_TYPE=Release '
                    '-DCMAKE_INSTALL_PREFIX={install_dir}'.format(**locals())
                 )
    build_and_install_cmd = 'cmake --build . --target install'
    os.system('git clone --recursive {url} {clone_dir}'.format(**locals()))
    os.makedirs(build_dir, exist_ok=True)
    subprocess.call(config_cmd, cwd=build_dir, shell=True)
    subprocess.call(build_and_install_cmd, cwd=build_dir, shell=True)

    return install_dir

with tempfile.TemporaryDirectory() as tmppath:
    install_dirs = ''
    for libname in ['options', 'semiprof', 'grid2grid']:
        install_dirs += '{};'.format(install_lib(tmppath, args.prefix, libname)) 

    print('\nUse the following CMake parameter: -DCMAKE_PREFIX_PATH="{}"'.format(install_dirs))


================================================
FILE: scripts/piz_daint_cpu.sh
================================================
# load the necessary modules
module load daint-mc
module swap PrgEnv-cray PrgEnv-gnu
module load CMake
module unload cray-libsci
module load intel # defines $MKLROOT

# Setup the compiler
#
export CC=`which cc`
export CXX=`which CC`

# Enable dynamic linking
#
export CRAYPE_LINK_TYPE=dynamic

# Enable threading
# 
export OMP_NUM_THREADS=18
export MKL_NUM_THREADS=18


================================================
FILE: scripts/piz_daint_gpu.sh
================================================
# load the necessary modules
module load daint-gpu
module swap PrgEnv-cray PrgEnv-gnu
module swap gcc/11.2.0 gcc/9.3.0
module unload cray-libsci
module load intel # defines $MKLROOT
module load cudatoolkit
module load CMake

# Setup the compiler
#
export CC=`which cc`
export CXX=`which CC`

export NCCL_ROOT=/opt/nvidia/hpc_sdk/Linux_x86_64/21.3/comm_libs/nccl
export NCCL_IB_DISABLE=1
# export NCCL_ROOT=/scratch/snx3000/kabicm/nccl/build
# export NCCL_PKG_CONFIG=/opt/nvidia/hpc_sdk/Linux_x86_64/21.3/comm_libs/nccl/lib/pkgconfig/
# export PKG_CONFIG_PATH=${NCCL_PKG_CONFIG}:${PKG_CONFIG_PATH}

# Enable dynamic linking
#
export CRAYPE_LINK_TYPE=dynamic
export CRAY_CUDA_MPS=1
export MPICH_RDMA_ENABLED_CUDA=1

# Enable threading
# 
export OMP_NUM_THREADS=12
export MKL_NUM_THREADS=12


================================================
FILE: scripts/piz_daint_gpu_aware_mpi.sh
================================================
# load the necessary modules
module load daint-gpu
module swap PrgEnv-cray PrgEnv-gnu
module swap gcc/11.2.0 gcc/9.3.0
module unload cray-libsci
module load intel # defines $MKLROOT
module load cudatoolkit
module load CMake

# Setup the compiler
#
export CC=`which cc`
export CXX=`which CC`

export MPICH_RDMA_ENABLED_CUDA=1
export MPICH_GPU_SUPPORT_ENABLED=1
export MPICH_NO_GPU_DIRECT=1
# export NCCL_ROOT=/scratch/snx3000/kabicm/nccl/build
# export NCCL_PKG_CONFIG=/opt/nvidia/hpc_sdk/Linux_x86_64/21.3/comm_libs/nccl/lib/pkgconfig/
# export PKG_CONFIG_PATH=${NCCL_PKG_CONFIG}:${PKG_CONFIG_PATH}

# Enable dynamic linking
#
export CRAYPE_LINK_TYPE=dynamic
export CRAY_CUDA_MPS=1

# Enable threading
# 
export OMP_NUM_THREADS=12
export MKL_NUM_THREADS=12


================================================
FILE: scripts/run_gpu.sh
================================================
#!/bin/bash -l
#SBATCH --job-name=matmul
#SBATCH --time=00:03:00
#SBATCH --nodes=4
#SBATCH --constraint=gpu
#set -x

module load daint-gpu
module swap PrgEnv-cray PrgEnv-gnu
module unload cray-libsci
module load intel
module load CMake
module load cudatoolkit
export CC=`which cc`
export CXX=`which CC`
export CRAYPE_LINK_TYPE=dynamic
export CRAY_CUDA_MPS=1

n_iter=1 srun -u -N 4 -n 48 ./build/miniapp/cosma-miniapp -m 25000 -n 25000 -k 25000 -P 48


================================================
FILE: scripts/schedule_miniapp_on_daint_cpu.sh
================================================
#!/bin/bash -l
#SBATCH --job-name=cosma_miniapp
#SBATCH --constraint=mc
#SBATCH --nodes=10
#SBATCH --ntasks-per-node=2
#SBATCH --time=2
#SBATCH --output=cosma_miniapp.out
#SBATCH --error=cosma_miniapp.err

module load daint-mc
module swap PrgEnv-cray PrgEnv-gnu
module load CMake
module unload cray-libsci
module load intel # defines $MKLROOT

# Setup the compiler
#
export CC=`which cc`
export CXX=`which CC`

# Enable dynamic linking
#
export CRAYPE_LINK_TYPE=dynamic

# Enable threading
# 
export OMP_NUM_THREADS=18
export MKL_NUM_THREADS=18

# Move to `build` directory if not there already
#
# SCRIPT_DIR="$( cd "$( dirname "${BASH_SOURCE[0]}" )" >/dev/null && pwd )"
# COSMA_DIR="$( dirname $SCRIPT_DIR )"
COSMA_DIR=$SCRATCH/cosma-master
MINIAPP_PATH=${COSMA_DIR}/build/miniapp

# Run tests
#
echo "====================="
echo "   Square Matrices"
echo "====================="
echo "(m, n, k) = (10000, 10000, 10000)"
echo "Nodes: 10"
echo "MPI processes per rank: 2"
echo ""
srun ${MINIAPP_PATH}/scalars_miniapp -m 10000 -n 10000 -k 10000 -P 20

echo ""
echo "====================="
echo "    Tall Matrices"
echo "====================="
echo "(m, n, k) = (1000, 1000, 1000000)"
echo "Nodes: 10"
echo "MPI processes per rank: 2"
echo ""
srun ${MINIAPP_PATH}/scalars_miniapp -m 1000 -n 1000 -k 1000000 -P 20


================================================
FILE: scripts/schedule_miniapp_on_daint_gpu.sh
================================================
#!/bin/bash -l
#SBATCH --job-name=cosma_miniapp_gpu
#SBATCH --constraint=gpu
#SBATCH --nodes=10
#SBATCH --ntasks-per-node=2
#SBATCH --time=2
#SBATCH --output=cosma_miniapp_gpu.out
#SBATCH --error=cosma_miniapp_gpu.err

module load daint-gpu
module swap PrgEnv-cray PrgEnv-gnu
module load CMake
module unload cray-libsci
module load intel # defines $MKLROOT
module load cudatoolkit # cublas

# Setup the compiler
#
export CC=`which cc`
export CXX=`which CC`
export CRAY_CUDA_MPS=1 # enables multiple ranks sharing the same GPU

# Enable dynamic linking
#
export CRAYPE_LINK_TYPE=dynamic

# Enable threading
# 
export OMP_NUM_THREADS=12
export MKL_NUM_THREADS=12

# Move to `build` directory if not there already
#
# SCRIPT_DIR="$( cd "$( dirname "${BASH_SOURCE[0]}" )" >/dev/null && pwd )"
# COSMA_DIR="$( dirname $SCRIPT_DIR )"
COSMA_DIR=$SCRATCH/COSMA
MINIAPP_PATH=${COSMA_DIR}/build/miniapp

# Run tests
#
echo "====================="
echo "   Square Matrices"
echo "====================="
echo "(m, n, k) = (10000, 10000, 10000)"
echo "Nodes: 10"
echo "MPI processes per rank: 2"
echo ""
srun ${MINIAPP_PATH}/scalars_miniapp -m 10000 -n 10000 -k 10000 -P 20

echo ""
echo "====================="
echo "    Tall Matrices"
echo "====================="
echo "(m, n, k) = (1000, 1000, 1000000)"
echo "Nodes: 10"
echo "MPI processes per rank: 2"
echo ""
srun ${MINIAPP_PATH}/scalars_miniapp -m 1000 -n 1000 -k 1000000 -P 20


================================================
FILE: scripts/schedule_tests_on_daint.sh
================================================
#!/bin/bash -l
#SBATCH --job-name=cosma_tests
#SBATCH --constraint=mc
#SBATCH --nodes=1
#SBATCH --ntasks-per-node=16
#SBATCH --time=2
#SBATCH --output=cosma_tests.out
#SBATCH --error=cosma_tests.err

module load daint-mc
module swap PrgEnv-cray PrgEnv-gnu
module load CMake
module unload cray-libsci
module load intel # defines $MKLROOT

# Setup the compiler
#
export CC=`which cc`
export CXX=`which CC`

# Enable dynamic linking
#
export CRAYPE_LINK_TYPE=dynamic

# Move to `build` directory if not there already
#
# SCRIPT_DIR="$( cd "$( dirname "${BASH_SOURCE[0]}" )" >/dev/null && pwd )"
# COSMA_DIR="$( dirname $SCRIPT_DIR )"
COSMA_DIR=$SCRATCH/COSMA
mkdir -p ${COSMA_DIR}/build
cd ${COSMA_DIR}/build

# Build tests if not already built
#
make tests

# Run tests
#
srun -n 1 tests/test.mapper
srun -n 4 tests/test.multiply_using_layout
srun -n 8 tests/test.scalar_matmul
srun -n 16 tests/test.pdgemm
srun -n 16 tests/test.multiply


================================================
FILE: spack/packages/costa/package.py
================================================
# Copyright 2013-2024 Lawrence Livermore National Security, LLC and other
# Spack Project Developers. See the top-level COPYRIGHT file for details.
#
# SPDX-License-Identifier: (Apache-2.0 OR MIT)


from spack.package import *


class Costa(CMakePackage):
    """
    Distributed Communication-Optimal Matrix Transpose and Reshuffle Library
    Based on the paper: https://arxiv.org/abs/2106.06601
    """

    maintainers("haampie", "kabicm", "RMeli", "mtaillefumier")
    homepage = "https://github.com/eth-cscs/COSTA"
    git = "https://github.com/eth-cscs/COSTA.git"

    license("BSD-3-Clause")

    # note: The default archives produced with github do not have the archives
    #       of the submodules.
    version("master", branch="master", submodules=True)
    version("2.3.0", sha256="0413311a2821d4cd1f3f026672a75a5b5a2956f61305c07d7fc14565a126b517")
    version("2.2.2", sha256="e87bc37aad14ac0c5922237be5d5390145c9ac6aef0350ed17d86cb2d994e67c")
    version("2.2.2", sha256="e87bc37aad14ac0c5922237be5d5390145c9ac6aef0350ed17d86cb2d994e67c")
    version("2.2.1", sha256="aa8aa2a4a79de094f857c22293825de270ff72becd6bd736ff9f2dd8c192446d")
    version("2.2", sha256="3e7333f012af76ec3508276ea90800313f6136504667021fe229e710bf6acdc7")
    version("2.1", sha256="c1e86452415083f7470b292d93ec60708b7c8dbafc2bac383636bb4b28135866")
    version("2.0", sha256="de250197f31f7d23226c6956a687c3ff46fb0ff6c621a932428236c3f7925fe4")

    depends_on("cxx", type="build")  # generated

    variant("scalapack", default=False, description="Build with ScaLAPACK API")
    variant("shared", default=True, description="Build shared libraries")
    variant("profiling", default=False, description="Enable profiling")
    variant("tests", default=False, description="Enable tests")
    variant("apps", default=False, description="Enable miniapp")
    variant("benchmarks", default=False, description="Enable benchmarks")

    depends_on("cmake@3.22:", type="build")
    depends_on("mpi@3:")
    depends_on("scalapack", when="+scalapack")
    depends_on("cxxopts", when="+apps")
    depends_on("cxxopts", when="+tests")
    depends_on("semiprof", when="+profiling")

    def url_for_version(self, version):
        if version == Version("2.0"):
            return "https://github.com/eth-cscs/COSTA/releases/download/v{0}/COSTA-v{1}.tar.gz".format(
                version, version
            )
        return "https://github.com/eth-cscs/COSTA/archive/refs/tags/v{0}.tar.gz".format(version)

    def setup_build_environment(self, env):
        return

    def costa_scalapack_cmake_arg(self):
        spec = self.spec

        if spec.satisfies("~scalapack"):
            return "OFF"
        elif spec.satisfies("^intel-mkl") or spec.satisfies("^intel-oneapi-mkl"):
            return "MKL"
        elif spec.satisfies("^cray-libsci"):
            return "CRAY_LIBSCI"

        return "CUSTOM"

    def cmake_args(self):
        return [
            self.define_from_variant("COSTA_WITH_BENCHMARKS", "benchmarks"),
            self.define_from_variant("COSTA_WITH_APPS", "apps"),
            self.define_from_variant("COSTA_WITH_TESTS", "tests"),
            self.define_from_variant("COSTA_WITH_PROFILING", "profiling"),
            self.define_from_variant("BUILD_SHARED_LIBS", "shared"),
            self.define("COSTA_SCALAPACK", self.costa_scalapack_cmake_arg()),
        ]


================================================
FILE: spack_repo/cosma/packages/cosma/fj-ssl2.patch
================================================
diff --git a/CMakeLists.txt b/CMakeLists.txt
index 1fd1e55..41a041b 100644
--- a/CMakeLists.txt
+++ b/CMakeLists.txt
@@ -19,7 +19,7 @@ set(CMAKE_EXPORT_COMPILE_COMMANDS "YES") # always write compile_commands.json
 
 set(COSMA_GPU_BACKENDS_LIST "CUDA" "ROCM")
 set(COSMA_SCALAPACK_LIST "OFF" "MKL" "CRAY_LIBSCI" "CUSTOM")
-set(COSMA_BLAS_LIST   "auto" "MKL" "OPENBLAS" "CRAY_LIBSCI" "CUSTOM" "BLIS" "ATLAS" "CUDA" "ROCM" "OFF")
+set(COSMA_BLAS_LIST   "auto" "MKL" "SSL2" "OPENBLAS" "CRAY_LIBSCI" "CUSTOM" "BLIS" "ATLAS" "CUDA" "ROCM" "OFF")
 option(COSMA_WITH_TESTS "Generate the test target." ON)
 option(COSMA_WITH_APPS "Generate the miniapp targets." ON)
 option(COSMA_WITH_BENCHMARKS "Generate the benchmark targets." ON)
@@ -45,7 +45,7 @@ if (COSMA_BLAS MATCHES "CUDA|ROCM")
   set(COSMA_GPU_BACKEND ${COSMA_BLAS})
 else()
   if(COSMA_BLAS STREQUAL "OFF")
-    message(FATAL_ERROR "A Blas implementation is needed when running on CPU only: choices are : auto, MKL, OPENBLAS, CRAY_LIBSCI, CUSTOM, BLIS, ATLAS, FLEXIBLAS, ARMPL, GenericBLAS")
+    message(FATAL_ERROR "A Blas implementation is needed when running on CPU only: choices are : auto, MKL, SSL2, OPENBLAS, CRAY_LIBSCI, CUSTOM, BLIS, ATLAS, FLEXIBLAS, ARMPL, GenericBLAS")
   else()
     set(COSMA_BLAS_VENDOR ${COSMA_BLAS})
   endif()
@@ -190,6 +190,7 @@ install(FILES "${cosma_BINARY_DIR}/cosmaConfig.cmake"
   "${cosma_BINARY_DIR}/cosmaConfigVersion.cmake"
   "${cosma_BINARY_DIR}/cosmaConfigVersion.cmake"
   "${cosma_SOURCE_DIR}/cmake/FindMKL.cmake"
+  "${cosma_SOURCE_DIR}/cmake/FindSSL2.cmake"
   "${cosma_SOURCE_DIR}/cmake/FindBlas.cmake"
   "${cosma_SOURCE_DIR}/cmake/FindSCALAPACK.cmake"
   "${cosma_SOURCE_DIR}/cmake/FindOPENBLAS.cmake"
diff --git a/cmake/FindBlas.cmake b/cmake/FindBlas.cmake
index aef956c..3c47561 100644
--- a/cmake/FindBlas.cmake
+++ b/cmake/FindBlas.cmake
@@ -14,6 +14,7 @@ endif()
 set(COSMA_BLAS_VENDOR_LIST
   "auto"
   "MKL"
+  "SSL2"
   "OPENBLAS"
   "FLEXIBLAS"
   "ARMPL"
diff --git a/cmake/FindSSL2.cmake b/cmake/FindSSL2.cmake
new file mode 100644
index 0000000..f0e11bf
--- /dev/null
+++ b/cmake/FindSSL2.cmake
@@ -0,0 +1,56 @@
+#.rst:
+# FindSSL2
+# -----------
+#
+# This module tries to find the SSL2 library.
+#
+# The following variables are set
+#
+# ::
+#
+#   SSL2_FOUND           - True if ssl2 is found
+#   SSL2_LIBRARIES       - The required libraries
+#   SSL2_INCLUDE_DIRS    - The required include directory
+#
+# The following import target is created
+#
+# ::
+#
+#   SSL2::ssl2
+
+#set paths to look for library from ROOT variables.If new policy is set, find_library() automatically uses them.
+# if(NOT POLICY CMP0074)
+set(_SSL2_PATHS ${SSL2_ROOT}
+                 $ENV{SSL2_ROOT}
+                 $ENV{SSL2ROOT}
+                 $ENV{SSL2_DIR}
+                 $ENV{SSL2DIR})
+# endif()
+
+find_library(
+    COSMA_SSL2_LINK_LIBRARIES
+    NAMES "fjlapackex"
+    HINTS ${_SSL2_PATHS}
+    PATH_SUFFIXES "lib64"
+)
+find_path(
+    COSMA_SSL2_INCLUDE_DIRS
+    NAMES "cblas.h" 
+    HINTS ${_SSL2_PATHS}
+    PATH_SUFFIXES "include"
+)
+
+# check if found
+include(FindPackageHandleStandardArgs)
+find_package_handle_standard_args(SSL2 REQUIRED_VARS COSMA_SSL2_INCLUDE_DIRS COSMA_SSL2_LINK_LIBRARIES)
+
+# add target to link against
+if(NOT TARGET cosma::BLAS::SSL2::ssl2)
+  add_library(cosma::BLAS::SSL2::ssl2 INTERFACE IMPORTED)
+  add_library(cosma::BLAS::SSL2::blas ALIAS cosma::BLAS::SSL2::ssl2)
+endif()
+set_property(TARGET cosma::BLAS::SSL2::ssl2 PROPERTY INTERFACE_LINK_LIBRARIES ${COSMA_SSL2_LINK_LIBRARIES})
+set_property(TARGET cosma::BLAS::SSL2::ssl2 PROPERTY INTERFACE_INCLUDE_DIRECTORIES ${COSMA_SSL2_INCLUDE_DIRS})
+
+# prevent clutter in cache
+MARK_AS_ADVANCED(SSL2_FOUND SSL2_LIBRARIES SSL2_INCLUDE_DIRS)


================================================
FILE: spack_repo/cosma/packages/cosma/package.py
================================================
# Copyright Spack Project Developers. See COPYRIGHT file for details.
#
# SPDX-License-Identifier: (Apache-2.0 OR MIT)


from spack_repo.builtin.build_systems.cmake import CMakePackage

from spack.package import *


class Cosma(CMakePackage):
    """
    Distributed Communication-Optimal Matrix-Matrix Multiplication Library
    """

    maintainers("haampie", "kabicm", "teonnik", "simonpintarelli", "mtaillefumier")
    homepage = "https://github.com/eth-cscs/COSMA"
    url = "https://github.com/eth-cscs/COSMA/archive/refs/tags/v2.6.6.tar.gz"
    git = "https://github.com/eth-cscs/COSMA.git"

    license("BSD-3-Clause")

    # note: The default archives produced with github do not have the archives
    #       of the submodules.
    version("master", branch="master", submodules=False)
    version("2.7.0", sha256="f4775d18379539d7bb5053bff8acb4e13d6ed31a9677f498d9099a7500488789")
    version("2.6.6", sha256="1604be101e77192fbcc5551236bc87888d336e402f5409bbdd9dea900401cc37")
    version("2.6.5", sha256="10d9b7ecc1ce44ec5b9e0c0bf89278a63029912ec3ea99661be8576b553ececf")
    version("2.6.4", sha256="6d7bd5e3005874af9542a329c93e7ccd29ca1a5573dae27618fac2704fa2b6ab")
    version("2.6.3", sha256="c2a3735ea8f860930bea6706d968497d72a1be0498c689b5bc4a951ffc2d1146")
    version("2.6.2", sha256="2debb5123cc35aeebc5fd2f8a46cfd6356d1e27618c9bb57129ecd09aa400940")
    version("2.6.1", sha256="69aa6634a030674f0d9be61e7b0bf0dc17acf0fc9e7a90b40e3179e2254c8d67")
    version("2.5.1", sha256="085b7787597374244bbb1eb89bc69bf58c35f6c85be805e881e1c0b25166c3ce")
    version("2.5.0", sha256="7f68bb0ee5c80f9b8df858afcbd017ad4ed87ac09439d13d7d890844dbdd3d54")
    version("2.4.0", sha256="5714315ce06d48037f86cfee2d7f19340643fee95e9d7f1e92dc1b623b67e395")
    version("2.3.0", sha256="0c01c2deb5a0cd177952178350188a62c42ce55e604d7948ac472f55bf0d4815")
    version("2.2.0", sha256="1eb92a98110df595070a12193b9221eecf9d103ced8836c960f6c79a2bd553ca")
    version("2.0.7", sha256="8d70bfcbda6239b6a8fbeaca138790bbe58c0c3aa576879480d2632d4936cf7e")
    version("2.0.2", sha256="4f3354828bc718f3eef2f0098c3bdca3499297497a220da32db1acd57920c68d")

    # We just need the libraries of cuda and rocm, so no need to extend
    # CudaPackage or ROCmPackage.
    variant("cuda", default=False, description="Build with cuBLAS support")
    variant("rocm", default=False, description="Build with rocBLAS support")
    variant("scalapack", default=False, description="Build with ScaLAPACK API")
    variant("shared", default=True, description="Build the shared library version")
    variant("tests", default=False, description="Build tests")
    variant("apps", default=False, description="Build miniapp")
    variant("profiling", default=False, description="Enable profiling")
    variant("gpu_direct", default=False, description="GPU aware MPI")

    with when("+cuda"):
        variant("nccl", default=False, description="Use cuda nccl")

    with when("+rocm"):
        variant("rccl", default=False, description="Use rocm rccl")

    with when("@2.8.0:+rocm"):
        variant("unified_memory", default=False)

    depends_on("cxx", type="build")
    depends_on("c", type="build")
    depends_on("fortran", type="build")

    depends_on("cmake@3.22:", type="build")
    depends_on("mpi@3:")
    depends_on("blas", when="~cuda ~rocm")
    depends_on("scalapack", when="+scalapack")
    depends_on("cuda", when="+cuda")
    depends_on("rocblas", when="+rocm")
    depends_on("nccl", when="+nccl")
    depends_on("rccl", when="+rccl")

    with when("@2.6.3:"):
        depends_on("tiled-mm@2.2:+cuda", when="+cuda")
        depends_on("tiled-mm@2.2:+rocm", when="+rocm")

    with when("@2.6.1:2.6.2"):
        depends_on("tiled-mm@2.0+rocm", when="+rocm")
        depends_on("tiled-mm@2.0+cuda", when="+cuda")

    with when("@2.6.1:"):
        depends_on("costa")
        depends_on("costa+scalapack", when="+scalapack")
        depends_on("cxxopts", when="+apps")
        depends_on("cxxopts", when="+tests")
        depends_on("semiprof", when="+profiling")
        depends_on("costa+profiling", when="+profiling")

    patch("fj-ssl2.patch", when="^fujitsu-ssl2")

    def setup_build_environment(self, env: EnvironmentModifications) -> None:
        if self.spec.satisfies("+cuda"):
            env.set("CUDA_PATH", self.spec["cuda"].prefix)

    def cosma_blas_cmake_arg(self):
        query_to_cmake_arg = [
            ("+cuda", "CUDA"),
            ("+rocm", "ROCM"),
            ("^[virtuals=blas] intel-oneapi-mkl", "MKL"),
            ("^[virtuals=blas] cray-libsci", "CRAY_LIBSCI"),
            ("^[virtuals=blas] netlib-lapack", "CUSTOM"),
            ("^[virtuals=blas] openblas", "OPENBLAS"),
            ("^[virtuals=blas] fujitsu-ssl2", "SSL2"),
        ]

        if self.version >= Version("2.4.0"):
            query_to_cmake_arg.extend(
                [
                    ("^[virtuals=blas] blis", "BLIS"),
                    ("^[virtuals=blas] amdblis", "BLIS"),
                    ("^[virtuals=blas] atlas", "ATLAS"),
                ]
            )

        for query, cmake_arg in query_to_cmake_arg:
            if query in self.spec:
                return cmake_arg

        return "CUSTOM"

    def cosma_scalapack_cmake_arg(self):
        spec = self.spec

        if spec.satisfies("~scalapack"):
            return "OFF"
        elif spec.satisfies("^[virtuals=scalapack] intel-oneapi-mkl"):
            return "MKL"
        elif spec.satisfies("^cray-libsci"):
            return "CRAY_LIBSCI"

        return "CUSTOM"

    def cmake_args(self):
        return [
            self.define_from_variant("COSMA_WITH_TESTS", "tests"),
            self.define_from_variant("COSMA_WITH_APPS", "apps"),
            self.define_from_variant("COSMA_WITH_NCCL", "nccl"),
            self.define_from_variant("COSMA_WITH_RCCL", "rccl"),
            self.define_from_variant("COSMA_WITH_GPU_AWARE_MPI", "gpu_direct"),
            self.define_from_variant("COSMA_WITH_PROFILING", "profiling"),
            self.define_from_variant("COSMA_USE_UNIFIED_MEMORY", "unified_memory"),
            self.define("COSMA_WITH_BENCHMARKS", False),
            self.define("COSMA_BLAS", self.cosma_blas_cmake_arg()),
            self.define("COSMA_SCALAPACK", self.cosma_scalapack_cmake_arg()),
            self.define_from_variant("BUILD_SHARED_LIBS", "shared"),
        ]


================================================
FILE: spack_repo/cosma/packages/tiled-mm/package.py
================================================
# Copyright Spack Project Developers. See COPYRIGHT file for details.
#
# SPDX-License-Identifier: (Apache-2.0 OR MIT)

from spack_repo.builtin.build_systems.cmake import CMakePackage
from spack_repo.builtin.build_systems.cuda import CudaPackage
from spack_repo.builtin.build_systems.rocm import ROCmPackage

from spack.package import *


class TiledMm(CMakePackage, CudaPackage, ROCmPackage):
    """Matrix multiplication on GPUs for matrices stored on a CPU. Similar to cublasXt,
    but ported to both NVIDIA and AMD GPUs."""

    homepage = "https://github.com/eth-cscs/Tiled-MM/"
    url = "https://github.com/eth-cscs/Tiled-MM/archive/refs/tags/v2.0.tar.gz"
    git = "https://github.com/eth-cscs/Tiled-MM.git"

    maintainers("mtaillefumier", "simonpintarelli", "RMeli")

    license("BSD-3-Clause")

    version("master", branch="master")

    version("2.3.2", sha256="1f91ca02f6ee8e400835fa90630618baf86a7b425b4bbbb4151068f72658b858")
    version("2.3.1", sha256="68914a483e62f796b790ea428210b1d5ef5943d6289e53d1aa62f56a20fbccc8")
    version("2.3", sha256="504c6201f5a9be9741c55036bf8e2656ae3f4bc19996295b264ee5e303c9253c")
    version("2.2", sha256="6d0b49c9588ece744166822fd44a7bc5bec3dc666b836de8bf4bf1a7bb675aac")
    version("2.0", sha256="ea554aea8c53d7c8e40044e6d478c0e8137d7e8b09d7cb9650703430d92cf32e")

    variant("shared", default=True, description="Build shared libraries")
    variant("examples", default=False, description="Enable examples")
    variant("tests", default=False, description="Enable tests")

    depends_on("cxx", type="build")  # generated

    depends_on("rocblas", when="+rocm")
    depends_on("cxxopts", when="+tests")
    depends_on("cxxopts", when="+examples")

    conflicts("~cuda~rocm")
    conflicts("+cuda", when="+rocm")

    def cmake_args(self):
        args = [
            self.define_from_variant("BUILD_SHARED_LIBS", "shared"),
            self.define_from_variant("TILEDMM_WITH_EXAMPLES", "examples"),
            self.define_from_variant("TILEDMM_WITH_TESTS", "tests"),
        ]

        if "+rocm" in self.spec:
            args.extend([self.define("TILEDMM_GPU_BACKEND", "ROCM")])

        if "+cuda" in self.spec:
            args.extend([self.define("TILEDMM_GPU_BACKEND", "CUDA")])

        return args


================================================
FILE: spack_repo/cosma/repo.yaml
================================================
repo:
  namespace: cosma


================================================
FILE: src/cosma/CMakeLists.txt
================================================
set(INSTALLED_TARGETS_LIST "")
set(cosma_src_files
  buffer.cpp
  communicator.cpp
  context.cpp
  interval.cpp
  layout.cpp
  local_multiply.cpp
  mapper.cpp
  math_utils.cpp
  matrix.cpp
  memory_pool.cpp
  multiply.cpp
  one_sided_communicator.cpp
  strategy.cpp
  two_sided_communicator.cpp
  random_generator.hpp
  cinterface.cpp
  environment_variables.cpp)

if (COSMA_GPU_BACKEND MATCHES "OFF")
	LIST(APPEND cosma_src_files blas.cpp)
endif ()

if (COSMA_GPU_BACKEND MATCHES "ROCM" OR COSMA_GPU_BACKEND MATCHES "CUDA")
  list(APPEND cosma_src_files "pinned_buffers.cpp")
  if (COSMA_WITH_NCCL OR COSMA_WITH_RCCL)
    list(APPEND cosma_src_files "gpu/nccl_utils.cpp")
  endif()
  if (COSMA_WITH_GPU_AWARE_MPI)
    list(APPEND cosma_src_files "gpu/gpu_aware_mpi_utils.cpp")
  endif()
endif()

add_library(cosma ${cosma_src_files})

target_include_directories(cosma PUBLIC
  $<BUILD_INTERFACE:${cosma_SOURCE_DIR}/src>
)

target_compile_features(cosma PUBLIC cxx_std_14)
target_link_libraries(cosma PUBLIC
  MPI::MPI_CXX
  costa::costa
  $<TARGET_NAME_IF_EXISTS:roc::rccl>
  $<TARGET_NAME_IF_EXISTS:cosma::nccl>
  $<$<STREQUAL:${COSMA_GPU_BACKEND},OFF>:cosma::BLAS::blas>
  $<TARGET_NAME_IF_EXISTS:Tiled-MM::Tiled-MM>
  $<$<STREQUAL:${COSMA_GPU_BACKEND},CUDA>:Tiled-MM::Tiled-MM>
  $<$<STREQUAL:${COSMA_GPU_BACKEND},ROCM>:Tiled-MM::Tiled-MM>
  $<$<BOOL:${COSMA_WITH_PROFILING}>:semiprof::semiprof>
  $<$<BOOL:${COSMA_SCALAPACK}>:cosma::scalapack::scalapack>)

target_compile_definitions(cosma PUBLIC
                              $<$<BOOL:${COSMA_WITH_NCCL}>:COSMA_WITH_NCCL>
                              $<$<STREQUAL:${COSMA_GPU_BACKEND},ROCM>:__HIP_PLATFORM_HCC__>
                              $<$<BOOL:${COSMA_WITH_GPU_AWARE_MPI}>:COSMA_WITH_GPU_AWARE_MPI>
                              $<$<BOOL:${COSMA_WITH_NCCL}>:COSMA_WITH_NCCL>
                              $<$<BOOL:${COSMA_WITH_RCCL}>:COSMA_WITH_NCCL>
                              $<$<STREQUAL:${COSMA_BLAS_VENDOR},MKL>:COSMA_WITH_MKL_BLAS>
                              $<$<STREQUAL:${COSMA_BLAS_VENDOR},BLIS>:COSMA_WITH_BLIS_BLAS>
                              $<$<NOT:$<IN_LIST:${COSMA_BLAS_VENDOR},MKL;BLIS;OFF>>:COSMA_WITH_BLAS>
                              $<$<STREQUAL:${COSMA_GPU_BACKEND},CUDA>:COSMA_HAVE_GPU>
                              $<$<STREQUAL:${COSMA_GPU_BACKEND},ROCM>:COSMA_HAVE_GPU>
                              PRIVATE
                              $<$<BOOL:${COSMA_WITH_PROFILING}>:COSMA_WITH_PROFILING>)


list(APPEND INSTALLED_TARGETS_LIST "cosma")

# if SCALAPACK is found and cosma_pxgemm library is not already created
# then create it here and link it to the profiler if needed
# build as a shared library is necessary here because of the function interposing
if(COSMA_SCALAPACK)
  if (NOT TARGET cosma_pxgemm AND BUILD_SHARED_LIBS)
    add_library(cosma_pxgemm scalapack.cpp
      pxgemm_params.hpp
      cosma_pxgemm.cpp
      pxgemm.cpp
    )

    target_link_libraries(cosma_pxgemm PUBLIC cosma
    $<$<BOOL:${COSMA_WITH_PROFILING}>:semiprof::semiprof>)

    target_compile_definitions(cosma_pxgemm PRIVATE $<$<BOOL:${COSMA_WITH_PROFILING}>:COSMA_WITH_PROFILING>)
    list(APPEND INSTALLED_TARGETS_LIST "cosma_pxgemm")
  endif()

  # this is a library exposing the prefixed scalapack API (with cosma/COSMA prefix)
  # it is aimed for users who don't want to overwrite the available scalapack API with cosma.
  # if SCALAPACK is found and cosma_prefixed_pxgemm library is not already created
  # then create it here and link it to the profiler if needed
    add_library(cosma_prefixed_pxgemm scalapack.cpp
      pxgemm_params.hpp
      prefixed_pxgemm.cpp
      cosma_pxgemm.cpp
    )
    target_link_libraries(cosma_prefixed_pxgemm
    PUBLIC cosma
    PRIVATE $<$<BOOL:${COSMA_WITH_PROFILING}>:semiprof::semiprof>)
    target_compile_definitions(cosma_prefixed_pxgemm PRIVATE $<$<BOOL:${COSMA_WITH_PROFILING}>:COSMA_WITH_PROFILING>)
    list(APPEND INSTALLED_TARGETS_LIST "cosma_prefixed_pxgemm")

  # the following library is aimed only for testing purposes
  # it provides templated cosma::pxgemm call without
  # pxgemm.h, so that pxgemm calls of scalapack are not overwritten
  # and can still be compared to scalapack for correctness check
  if(NOT TARGET cosma_pxgemm_cpp)
    add_library(cosma_pxgemm_cpp scalapack.cpp
      pxgemm_params.hpp
      cosma_pxgemm.cpp
    )
    target_link_libraries(cosma_pxgemm_cpp
                            PUBLIC cosma
                            PRIVATE $<$<BOOL:${COSMA_WITH_PROFILING}>:semiprof::semiprof>)

    target_compile_definitions(cosma_pxgemm_cpp
                                  PRIVATE $<$<BOOL:${COSMA_WITH_PROFILING}>:COSMA_WITH_PROFILING>)
    list(APPEND INSTALLED_TARGETS_LIST "cosma_pxgemm_cpp")
  endif()
endif()

  install(TARGETS ${INSTALLED_TARGETS_LIST}
    EXPORT cosma_targets
    LIBRARY DESTINATION "${CMAKE_INSTALL_LIBDIR}"
    ARCHIVE DESTINATION "${CMAKE_INSTALL_LIBDIR}"
    INCLUDES DESTINATION "${CMAKE_INSTALL_INCLUDEDIR}")

  install(EXPORT cosma_targets
    FILE cosmaTargets.cmake
    NAMESPACE cosma::
    DESTINATION "${CMAKE_INSTALL_LIBDIR}/cmake/cosma")


================================================
FILE: src/cosma/aligned_allocator.hpp
================================================
#pragma once

#include <mpi.h>

#include <cassert>
#include <cosma/environment_variables.hpp>
#include <cosma/math_utils.hpp>
#include <exception>
#include <iostream>
#include <limits>

/*
 * A custom allocator that:
 *   - allocates the memory encouriging the use of huge pages
 *   - deallocates the memory
 */

namespace cosma {
template <typename T>
class aligned_allocator {
  public:
    using value_type = T;
    using pointer = value_type *;
    using const_pointer = const value_type *;
    using reference = value_type &;
    using const_reference = const value_type &;
    using size_type = std::size_t;
    using difference_type = std::ptrdiff_t;

    // the alignement can be specified by the environment variable
    // or take its default value otherwise.
    // The default sizes, as well as the environment variable names
    // are defined in <cosma/environment_variables.hpp>
    static int get_alignment() {
        static int alignment = cosma::get_cosma_cpu_memory_alignment();
        return alignment;
    }

    // the minimum alignment for given type T
    std::size_t min_alignment() {
        return std::max(math_utils::next_power_of_2(sizeof(T)), sizeof(void *));
    }

    // Calculate how many additional elements we have to allocate for an array
    // of length n and data type T.
    static std::size_t get_alignment_padding(std::size_t n) {
        auto alignment = get_alignment();
        assert(alignment > 0);
        // Calculate the remainder in bytes (since the alignment is in bytes)
        auto remainder = (n * sizeof(T)) % alignment;

        // Convert the padding from bytes to the number of elements
        remainder = remainder != 0 ? (alignment - remainder) / sizeof(T) : 0;

        // std::cout << "For size " << n << ", reminder = " << remainder <<
        // std::endl; std::cout << "sizeof(T) = " << sizeof(T) << std::endl;
        return remainder;
    }

    // allocate memory with alignment specified as a template parameter
    // returns nullptr on failure
    T *aligned_malloc(std::size_t size) {
        auto alignment = get_alignment();
        // if alignment is disabled, use the standard malloc
        if (alignment <= 0) {
            return reinterpret_cast<T *>(malloc(size * sizeof(T)));
        }
        // check if the requested size is a multiple of the alignment
        assert(get_alignment_padding(size) == 0);
        // check if the alignment is >= min_alignment for this data type T
        assert(alignment >= min_alignment());
        // check if the alignment is a power of 2 and a multiple of
        // sizeof(void*).
        assert(math_utils::is_power_of_2(alignment));
        // "Memory alignment must be a power of 2.");
        // This is required for the posix_memalign function.
        assert(alignment % sizeof(void *) == 0);
        // "Memory alignment must be a multiple of sizeof(void*)");
        void *ptr;
        if (posix_memalign(&ptr, alignment, size * sizeof(T)) == 0) {
            return reinterpret_cast<T *>(ptr);
        }
        return nullptr;
    }

    aligned_allocator() {}
    ~aligned_allocator() {}

    aligned_allocator(aligned_allocator const &) {}

    pointer address(reference r) { return &r; }

    const_pointer address(const_reference r) { return &r; }

    pointer allocate(size_type cnt,
                     const void* = nullptr) {
        if (cnt > 0) {
            pointer ptr;
            if (!cosma::get_unified_memory()) {
                ptr = aligned_malloc(cnt);
	    }
#if defined(COSMA_USE_UNIFIED_MEMORY)
            else {
                hipMalloc(&ptr, cnt * sizeof(T));
	    }
#endif
            return ptr;
        }
        return nullptr;
    }

    void deallocate(pointer p, size_type cnt) {
        if (p) {
            if (!cosma::get_unified_memory())
                std::free(p);
#if defined(COSMA_USE_UNIFIED_MEMORY)
            else
                hipFree(p);
#endif
        }
    }

    size_type max_size() const {
        return std::numeric_limits<size_type>::max() / sizeof(T);
    }

    void construct(pointer p, const T &t) { new (p) T(t); }

    void destroy(pointer p) {
        if (p) {
            p->~T();
        }
    }

    bool operator==(aligned_allocator const &) { return true; }

    bool operator!=(aligned_allocator const &a) { return !operator==(a); }
};

} // namespace cosma


================================================
FILE: src/cosma/blacs.hpp
================================================
#pragma once
#include <mpi.h>

namespace cosma {
namespace blacs {
extern "C" {
    // Initialization
    void Cblacs_pinfo(int* mypnum, int* nprocs);
    void Cblacs_setup(int* mypnum, int* nprocs);
    void Cblacs_set(int ictxt, int what, int* val);
    void Cblacs_get(int ictxt, int what, int* val);
    void Cblacs_gridinit(int* ictxt, char* order, int nprow, int npcol);
    void Cblacs_gridmap(int* ictxt, int* usermap, int ldup, int nprow, int npcol);

    // Finalization
    void Cblacs_freebuff(int ictxt, int wait);
    void Cblacs_gridexit(int ictxt);
    void Cblacs_exit(int NotDone);

    // Abort
    void Cblacs_abort(int ictxt, int errno);

    // Information
    void Cblacs_gridinfo(int ictxt, int* nprow, int* npcol, int* myrow, int* mycol);
    int Cblacs_pnum(int ictxt, int prow, int pcol);
    void Cblacs_pcoord(int ictxt, int nodenum, int* prow, int* pcol);

    // Barrier
    void Cblacs_barrier(int ictxt, char* scope);

    // MPI communicator <-> Blacs context
    MPI_Comm Cblacs2sys_handle(int ictxt);
    int Csys2blacs_handle(MPI_Comm mpi_comm);
    void Cfree_blacs_system_handle(int i_sys_ctxt);
}
}}


================================================
FILE: src/cosma/blas.cpp
================================================
#include <cosma/blas.hpp>

// extern "C" {
#ifdef COSMA_WITH_MKL_BLAS
#include <mkl.h>
#endif

#ifdef COSMA_WITH_BLIS_BLAS
#include <blis.h>
#endif

#ifdef COSMA_WITH_BLAS
#include <cblas.h>
// this is for backward compatibility,
// in case CBLAS_LAYOUT is not defined
typedef CBLAS_ORDER CBLAS_LAYOUT;
#endif
// }

// The file is not needed if GPU is used
//
#if defined(COSMA_WITH_MKL_BLAS) || defined(COSMA_WITH_BLIS_BLAS) || defined(COSMA_WITH_BLAS)
namespace cosma {
void gemm(const int M,
          const int N,
          const int K,
          const double alpha,
          const double *A,
          const int lda,
          const double *B,
          const int ldb,
          const double beta,
          double *C,
          const int ldc) {
    cblas_dgemm(CBLAS_LAYOUT::CblasColMajor,
                CBLAS_TRANSPOSE::CblasNoTrans,
                CBLAS_TRANSPOSE::CblasNoTrans,
                M,
                N,
                K,
                alpha,
                A,
                lda,
                B,
                ldb,
                beta,
                C,
                ldc);
}

void gemm(const int M,
          const int N,
          const int K,
          const std::complex<double> alpha,
          const std::complex<double> *A,
          const int lda,
          const std::complex<double> *B,
          const int ldb,
          const std::complex<double> beta,
          std::complex<double> *C,
          const int ldc) {
    cblas_zgemm(CBLAS_LAYOUT::CblasColMajor,
                CBLAS_TRANSPOSE::CblasNoTrans,
                CBLAS_TRANSPOSE::CblasNoTrans,
                M,
                N,
                K,
                reinterpret_cast<const double*>(&alpha),
                reinterpret_cast<const double*>(A),
                lda,
                reinterpret_cast<const double*>(B),
                ldb,
                reinterpret_cast<const double*>(&beta),
                reinterpret_cast<double*>(C),
                ldc);
}

void gemm(const int M,
          const int N,
          const int K,
          const float alpha,
          const float *A,
          const int lda,
          const float *B,
          const int ldb,
          const float beta,
          float *C,
          const int ldc) {
    cblas_sgemm(CBLAS_LAYOUT::CblasColMajor,
                CBLAS_TRANSPOSE::CblasNoTrans,
                CBLAS_TRANSPOSE::CblasNoTrans,
                M,
                N,
                K,
                alpha,
                A,
                lda,
                B,
                ldb,
                beta,
                C,
                ldc);
}

void gemm(const int M,
          const int N,
          const int K,
          const std::complex<float> alpha,
          const std::complex<float> *A,
          const int lda,
          const std::complex<float> *B,
          const int ldb,
          const std::complex<float> beta,
          std::complex<float> *C,
          const int ldc) {
    cblas_cgemm(CBLAS_LAYOUT::CblasColMajor,
                CBLAS_TRANSPOSE::CblasNoTrans,
                CBLAS_TRANSPOSE::CblasNoTrans,
                M,
                N,
                K,
                reinterpret_cast<const float*>(&alpha),
                reinterpret_cast<const float*>(A),
                lda,
                reinterpret_cast<const float*>(B),
                ldb,
                reinterpret_cast<const float*>(&beta),
                reinterpret_cast<float*>(C),
                ldc);
}

} // namespace cosma
#endif


================================================
FILE: src/cosma/blas.hpp
================================================
#pragma once
#include <complex>

namespace cosma {
void gemm(const int M,
          const int N,
          const int K,
          const double alpha,
          const double *A,
          const int lda,
          const double *B,
          const int ldb,
          const double beta,
          double *C,
          const int ldc);

void gemm(const int M,
          const int N,
          const int K,
          const std::complex<double> alpha,
          const std::complex<double> *A,
          const int lda,
          const std::complex<double> *B,
          const int ldb,
          const std::complex<double> beta,
          std::complex<double> *C,
          const int ldc);

void gemm(const int M,
          const int N,
          const int K,
          const float alpha,
          const float *A,
          const int lda,
          const float *B,
          const int ldb,
          const float beta,
          float *C,
          const int ldc);

void gemm(const int M,
          const int N,
          const int K,
          const std::complex<float> alpha,
          const std::complex<float> *A,
          const int lda,
          const std::complex<float> *B,
          const int ldb,
          const std::complex<float> beta,
          std::complex<float> *C,
          const int ldc);
} // namespace cosma


================================================
FILE: src/cosma/buffer.cpp
================================================
#include <cosma/buffer.hpp>
#include <cosma/context.hpp>
#include <cosma/profiler.hpp>
#include <complex>

#include <algorithm>

namespace cosma {

template<typename T>
Buffer<T>::Buffer(): ctxt_(nullptr) {}

template <typename T>
Buffer<T>::Buffer(cosma_context<T>* ctxt,
                  Mapper *mapper,
                  Layout *layout,
                  bool dry_run)
    : ctxt_(ctxt)
    , strategy_(&(mapper->strategy()))
    , label_(mapper->label())
    , rank_(mapper->rank())
    , mapper_(mapper)
    , layout_(layout) {

    PE(preprocessing_matrices_buffer);
    compute_n_buckets();

    max_base_buffer_size_ = 0;
    max_reduce_buffer_size_ = 0;
    max_reshuffle_buffer_size_ = 0;
    current_buffer_ = 0;
    max_send_buffer_size_ = (size_t)mapper_->initial_size();
    max_recv_buffer_size_ = (size_t)mapper_->initial_size();

    init_first_split_steps();
    buff_sizes_ = compute_buffer_size();

    // to account for possible swapping with the reduce buffer
    // that occurs if k split is present and beta != 0
    if (label_ == 'C') {
        for (int step = 0; step < strategy_->n_steps(); ++step) {
            if (strategy_->split_k(step) && strategy_->parallel_step(step)) {
                max_reduce_buffer_size_ = std::max(
                          max_reduce_buffer_size_,
                          *max_element(buff_sizes_.begin(), buff_sizes_.end()));
                break;
            }
        }
    }

    allocate_initial_buffers(dry_run);
    PL();
}

template <typename T>
Buffer<T>::Buffer(Mapper *mapper,
                  Layout *layout,
                  bool dry_run)
    : Buffer(get_context_instance<T>(), mapper, layout, dry_run) {}

template <typename T>
void Buffer<T>::allocate_communication_buffers(bool dry_run) {
    if (!dry_run && rank_ < strategy_->P && buff_sizes_.size() > 1) {
        // check if the initial buffer is already initialized
        assert(buffers_.size() == 1);
        // initial buffer is already allocated, so start from 1
        for (int i = 1; i < buff_sizes_.size(); ++i) {
            auto id = ctxt_->get_memory_pool().get_buffer_id(buff_sizes_[i]);
            buffers_.push_back(id);
        }

        if (max_reshuffle_buffer_size_ > 0) {
            reshuffle_buffer_ = ctxt_->get_memory_pool().get_buffer_id(max_reshuffle_buffer_size_);
        }

        if (max_reduce_buffer_size_ > 0) {
            reduce_buffer_ = ctxt_->get_memory_pool().get_buffer_id(max_reduce_buffer_size_);
        }
#ifdef DEBUG
        for (int rank = 0; rank < strategy_->P; ++rank) {
            if (rank_ == rank) {
                std::cout << "Rank " << rank_ << " buffers" << std::endl;
                std::cout << "Buffer sizes for matrix " << label_ << " on rank " << rank_
                          << std::endl;
                std::cout << "max_reshuffle_buffer_size_ = " << max_reshuffle_buffer_size_
                          << std::endl;
                std::cout << "max_reduce_buffer_size_ = " << max_reduce_buffer_size_
                          << std::endl;
                std::cout << "max_send_buffer_size_ = " << max_send_buffer_size_
                          << std::endl;
                std::cout << "max_recv_buffer_size_ = " << max_recv_buffer_size_
                          << std::endl;
                std::cout << "max_base_buffer_size_ = " << max_base_buffer_size_
                          << std::endl;
                for (int i = 0; i < buff_sizes_.size(); ++i) {
                    std::cout << "buffer" << i << " size = " << buff_sizes_[i] << std::endl;
                }
            }
            // MPI_Barrier(MPI_COMM_WORLD);
        }
#endif
    }
}

template <typename T>
std::vector<std::size_t> Buffer<T>::get_all_buffer_sizes() {
    std::vector<std::size_t> buffer_sizes;
    if (rank_ < strategy_->P) {
        if (buff_sizes_.size() >= 1) {
            buffer_sizes.push_back(std::max(
                                       (size_t) buff_sizes_[0],
                                       mapper_->initial_size()
                                   )
                                  );
        }
        for (int i = 1; i < buff_sizes_.size(); ++i) {
            buffer_sizes.push_back(buff_sizes_[i]);
        }
        if (max_reduce_buffer_size_ > 0) {
            buffer_sizes.push_back(max_reduce_buffer_size_);
        }
        if (max_reshuffle_buffer_size_ > 0) {
            buffer_sizes.push_back(max_reshuffle_buffer_size_);
        }
    }

    return buffer_sizes;
}

template <typename T>
void Buffer<T>::allocate_initial_buffers(bool dry_run) {
    if (!dry_run && rank_ < strategy_->P && buff_sizes_.size() > 0) {
        buffers_.reserve(buff_sizes_.size());

        // allocate initial buffer (to store the matrix)
        buff_sizes_[0] = std::max((size_t) buff_sizes_[0], mapper_->initial_size());
        auto id = ctxt_->get_memory_pool().get_buffer_id(buff_sizes_[0]);
        assert(buffers_.size() == 0);
        buffers_.push_back(id);
    }
}

template <typename T>
void Buffer<T>::free_initial_buffers(bool dry_run) {
    if (!dry_run && rank_ < strategy_->P && buff_sizes_.size() > 0) {
        // check if all the other buffers were deallocated previously
        // buff_sizes_ is equal to n_buffers throughout the lifetime of the class
        // but buffers_ size is decreased whenever some buffer is freed
        assert(buffers_.size() == 1);

        // deallocate initial buffer (that are storing the matrix)
        auto ptr = ctxt_->get_memory_pool().get_buffer_pointer(buffers_[0]);
        ctxt_->get_memory_pool().free_buffer(ptr, buff_sizes_[0]);
        // remove the pointers pointing to them
        buffers_.pop_back();
        buff_sizes_.pop_back();
    }
}

template <typename T>
void Buffer<T>::free_communication_buffers(bool dry_run) {
    if (dry_run || rank_ >= strategy_->P || buff_sizes_.size() <= 1) return;
    // deallocate reshuffle and reduce buffers separately
    if (max_reduce_buffer_size_ > 0) {
        auto ptr = ctxt_->get_memory_pool().get_buffer_pointer(reduce_buffer_);
        ctxt_->get_memory_pool().free_buffer(ptr, max_reduce_buffer_size_);
    }

    if (max_reshuffle_buffer_size_ > 0) {
        auto ptr = ctxt_->get_memory_pool().get_buffer_pointer(reshuffle_buffer_);
        ctxt_->get_memory_pool().free_buffer(ptr, max_reshuffle_buffer_size_);
    }

    // if there are no communication buffers left, skip
    if (buff_sizes_.size() == 1)
        return;

    int n_buffers = buff_sizes_.size();
    // i = 0 is the initial buffer storing the matrix, so we skip this one.
    for (int i = n_buffers-1; i >= 1; --i) {
        auto ptr = ctxt_->get_memory_pool().get_buffer_pointer(buffers_.back());
        ctxt_->get_memory_pool().free_buffer(ptr, buff_sizes_[i]);
        // remove the pointers pointing to them
        buffers_.pop_back();
    }
}

template <typename T>
Buffer<T>::~Buffer() {
    // check if communication buffers are already deallocated
    // buffers_.size() can also be 0 if the buffer was default constructed
    if (buffers_.size() > 0) {
        free_initial_buffers();
    }
}

template <typename T>
void Buffer<T>::compute_n_buckets() {
    if (strategy_->empty()) 
        return ;
    n_buckets_ = std::vector<int>(strategy_->n_steps());
    expanded_after_ = std::vector<bool>(strategy_->n_steps());
    int prod_n_seq = 1;

    bool expanded = false;

    for (int step = strategy_->n_steps() - 1; step >= 0; --step) {
        // if the current step is sequential and this matrix was split
        // then update the product of all seq steps in
        // which this matrix was split, which represents
        // the number of buckets
        if (strategy_->sequential_step(step)) {
            if (strategy_->split(label_, step)) {
                prod_n_seq *= strategy_->divisor(step);
            }
        } else {
            // if the current matrix was expanded (i.e. NOT split)
            if (!strategy_->split(label_, step)) {
                expanded = true;
            }
        }
        n_buckets_[step] = prod_n_seq;
        expanded_after_[step] = expanded;
    }
}

template <typename T>
void Buffer<T>::init_first_split_steps() {
    int step = 0;
    first_seq_split_step = -1;
    last_first_seq_split_step = -1;
    first_par_extend_step = -1;

    while (step < strategy_->n_steps()) {
        if (strategy_->sequential_step(step) &&
            strategy_->split(label_, step)) {
            // split in seq
            if (first_par_extend_step < 0 && first_seq_split_step < 0) {
                // first_seq_step not yet found
                first_seq_split_step = step;
                last_first_seq_split_step = step;
            } else if (first_par_extend_step < 0) {
                // par step still did not occur
                last_first_seq_split_step = step;
            } else {
                break;
            }
        } else if (strategy_->parallel_step(step) &&
                   !strategy_->split(label_, step)) {
            // expanded
            if (first_par_extend_step < 0) {
                // first par step still was not found
                first_par_extend_step = step;
            } else {
                break;
            }
        }
        step++;
    }
}

template <typename T>
int Buffer<T>::buff_index_before_gemm() const {
    if (buffers_.size() == 0)
        return -1;
    if (buffers_.size() == 1)
        return 0;
    // std::cout << "par steps before gemm for " << label_ << " = " <<
    // strategy_->par_steps_before_gemm(label_) << std::endl;
    return strategy_->parallel_steps_before_gemm(label_) % 2 != 0
               ? buffers_.size() - 1
               : buffers_.size() - 2;
}

template <typename T>
T* Buffer<T>::buffer_ptr() {
    auto ptr = ctxt_->get_memory_pool().get_buffer_pointer(buffers_[current_buffer_]);
    return ptr;
}

template <typename T>
const T* Buffer<T>::buffer_ptr() const {
    auto ptr = ctxt_->get_memory_pool().get_buffer_pointer(buffers_[current_buffer_]);
    return ptr;
}

template <typename T>
const size_t Buffer<T>::buffer_size() const {
    return buff_sizes_[current_buffer_];
}

template <typename T>
int Buffer<T>::buffer_index() {
    return current_buffer_;
}

template <typename T>
void Buffer<T>::set_buffer_index(int idx) {
    current_buffer_ = idx;
}

template <typename T>
void Buffer<T>::swap_reduce_buffer_with(size_t buffer_idx) {
    std::swap(buffers_[buffer_idx], reduce_buffer_);
    std::swap(buff_sizes_[buffer_idx], max_reduce_buffer_size_);
}

template <typename T>
typename Buffer<T>::scalar_t *Buffer<T>::reshuffle_buffer_ptr() {
    if (max_reshuffle_buffer_size_ > 0)
        return ctxt_->get_memory_pool().get_buffer_pointer(reshuffle_buffer_);
    return nullptr;
}

template <typename T>
typename Buffer<T>::scalar_t *Buffer<T>::reduce_buffer_ptr() {
    if (max_reduce_buffer_size_ > 0) {
        return ctxt_->get_memory_pool().get_buffer_pointer(reduce_buffer_);
    }
    return nullptr;
}

template <typename T>
T* Buffer<T>::initial_buffer_ptr() {
    if (buffers_.size() == 0) {
        return nullptr;
    }
    return ctxt_->get_memory_pool().get_buffer_pointer(buffers_[0]);
}

template <typename T>
const T* Buffer<T>::initial_buffer_ptr() const {
    if (buffers_.size() == 0) {
        return nullptr;
    }
    return ctxt_->get_memory_pool().get_buffer_pointer(buffers_[0]);
}

template <typename T>
const size_t Buffer<T>::initial_buffer_size() const {
    if (buff_sizes_.size() == 0) {
        return 0;
    }
    return buff_sizes_[0];
}

// increases the index of the current buffer
template <typename T>
void Buffer<T>::advance_buffer() {
    // if we are at the last buffer, we then "swap" it with the pre-last buffer.
    // we do this by letting the current index point to the pre-last buffer.
    if (current_buffer_ == buffers_.size() - 1)
        current_buffer_--;
    else
        current_buffer_++;

    // should never happen
    if (current_buffer_ < 0)
        current_buffer_ = 0;
}

template <typename T>
std::vector<size_t> Buffer<T>::compute_buffer_size() {
    if (strategy_->empty()) {
        return {(size_t)mapper_->initial_size()};
    }

    Interval m(0, strategy_->m - 1);
    Interval n(0, strategy_->n - 1);
    Interval k(0, strategy_->k - 1);
    Interval P(0, strategy_->P - 1);

    // assume most memory-consuming case when beta=T{1}
    return compute_buffer_size(m, n, k, P, 0, rank_, T{1});
}

template <typename T>
std::vector<size_t> Buffer<T>::compute_buffer_size(Interval &m,
                                                      Interval &n,
                                                      Interval &k,
                                                      Interval &P,
                                                      int step,
                                                      int rank,
                                                      scalar_t beta) {
    std::vector<size_t> sizes;
    // current submatrices that are being computed
    Interval2D a_range(m, k);
    Interval2D b_range(k, n);
    Interval2D c_range(m, n);
    Interval2D range;

    // For each of P processors remember which sequential bucket we are
    // currently on
    std::vector<int> buckets = layout_->seq_buckets(P);
    // Skip all buckets that are "before" the current submatrices.
    // the relation submatrix1 <before> submatrix2 is defined in Interval2D.
    // Intuitively, this will skip all the buckets that are "above" or "on the
    // left" of the current submatrices. We say "before" because whenever we
    // split in sequential sequentially, we always first start with the "above"
    // submatrix (if the splitting is horizontal) or with the left one (if the
    // splitting is vertical). which explains the name of the relation "before".
    if (label_ == 'A') {
        range = a_range;
    } else if (label_ == 'B') {
        range = b_range;
    } else {
        range = c_range;
    }
    layout_->update_buckets(P, range);

    // check the base case
    if (n_buckets_[step] == 1) {
        compute_max_buffer_size(m, n, k, P, step, rank, beta);
        layout_->set_seq_buckets(P, buckets);
        if (expanded_after_[step]) {
            return {max_recv_buffer_size_, max_recv_buffer_size_};
        } else {
            // return {max_recv_buffer_size_, max_recv_buffer_size_};
            return {max_recv_buffer_size_};
        }
        // if (expanded_after_[step])
        //     return {max_recv_buffer_size_, max_recv_buffer_size_};
        // else
        //     return {};
    }
    // invoke a parallel or a sequential step:
    if (strategy_->sequential_step(step)) {
        int div = strategy_->divisor(step);
        int divm = strategy_->divisor_m(step);
        int divn = strategy_->divisor_n(step);
        int divk = strategy_->divisor_k(step);

        for (int i = 0; i < div; ++i) {
            Interval newm = m.subinterval(divm, divm > 1 ? i : 0);
            Interval newn = n.subinterval(divn, divn > 1 ? i : 0);
            Interval newk = k.subinterval(divk, divk > 1 ? i : 0);

            // update beta value
            scalar_t new_beta = beta;
            if (label_ == 'C' && divk > 1) {
                new_beta = 1;
                // new_beta = i == 0 && beta == 0 ? 0 : 1;
            }

            // compute substeps
            std::vector<size_t> subsizes = compute_buffer_size(
                newm, newn, newk, P, step + 1, rank, new_beta);

            // initialize the sizes vector in the first branch of sequential
            if (i == 0) {
                sizes = std::vector<size_t>(subsizes.size());
            }

            // finds the maximum buffer size for each step among all sequential
            // branches
            for (int j = 0; j < sizes.size(); ++j) {
                sizes[j] = std::max(sizes[j], subsizes[j]);
            }

            // if dividing over absent dimension, then all the branches are the
            // same so skip the rest
            if (!strategy_->split(label_, step)) {
                break;
            }
        }
        if (strategy_->split(label_, step)) {
            int max_size = 0;
            std::vector<int> block_sizes =
                layout_->sizes_inside_range(range, rank_, max_size);

            if (first_par_extend_step < 0 || step < first_par_extend_step) {
                if (step == last_first_seq_split_step) {
                    sizes.insert(sizes.begin(), max_size);
                } else {
                    sizes[0] = std::max(sizes[0], (size_t)max_size);
                }
            }
        }
    } else {
        int div = strategy_->divisor(step);
        int divm = strategy_->divisor_m(step);
        int divn = strategy_->divisor_n(step);
        int divk = strategy_->divisor_k(step);
        // processor subinterval which the current rank belongs to
        int partition_idx = P.subinterval_index(div, rank);
        Interval newP = P.subinterval(div, partition_idx);
        // intervals of M, N and K that the current rank is in charge of,
        // together with other ranks from its group.
        // (see the definition of group and offset below)
        Interval newm = m.subinterval(divm, divm > 1 ? partition_idx : 0);
        Interval newn = n.subinterval(divn, divn > 1 ? partition_idx : 0);
        Interval newk = k.subinterval(divk, divk > 1 ? partition_idx : 0);

        int offset = rank - newP.first();

        std::vector<std::vector<int>> size_before_expansion(P.length());
        std::vector<int> total_before_expansion(P.length());
        std::vector<std::vector<int>> size_after_expansion(newP.length());
        std::vector<int> total_after_expansion(newP.length());

        size_t max_size = -1;

        bool expanded = !strategy_->split(label_, step);

        if (expanded) {
            /*
             * this gives us the 2D interval of the matrix that will be
             expanded: if divm > 1 => matrix B expanded => Interval2D(k, n) if
             divn > 1 => matrix A expanded => Interval2D(m, k) if divk > 1 =>
             matrix C expanded => Interval2D(m, n)
            */
            Interval2D range;

            if (divm > 1)
                range = Interval2D(k, n);
            else if (divn > 1)
                range = Interval2D(m, k);
            else
                range = Interval2D(m, n);

            layout_->buffers_before_expansion(
                P, range, size_before_expansion, total_before_expansion);

            layout_->buffers_after_expansion(P,
                                             newP,
                                             size_before_expansion,
                                             total_before_expansion,
                                             size_after_expansion,
                                             total_after_expansion);

            // increase the buffer sizes before the substeps call
            layout_->set_sizes(newP, size_after_expansion);

            // this is the sum of sizes of all the buckets after expansion
            // that the current rank will own.
            // which is also the size of the matrix after expansion
            size_t old_size = total_before_expansion[rank - P.first()];
            size_t new_size = total_after_expansion[rank - newP.first()];
            max_size = std::max(old_size, new_size);

            int n_blocks = size_before_expansion[rank - P.first()].size();

            if (n_blocks > 1) {
                max_reshuffle_buffer_size_ =
                    std::max(max_reshuffle_buffer_size_, new_size);
            }

            // if C was expanded, then reduce was invoked
            if (label_ == 'C' && beta != scalar_t{0}) {
                int subint_index, subint_offset;
                std::tie(subint_index, subint_offset) =
                    P.locate_in_subinterval(div, rank);
                int target =
                    P.locate_in_interval(div, subint_index, subint_offset);
                max_reduce_buffer_size_ =
                    std::max(max_reduce_buffer_size_,
                             (size_t)total_before_expansion[target]);
            }
        }

        // if division by k, and we are in the branch where beta > 0, then
        // reset beta to 0, but keep in mind that on the way back from substeps
        // we will have to sum the result with the local data in C
        // this is necessary since reduction happens AFTER the substeps
        // so we cannot pass beta = 1 if the data is not present there BEFORE
        // the substeps.
        scalar_t new_beta = beta;
        if (strategy_->split_k(step) && beta != scalar_t{0}) {
            new_beta = scalar_t{0};
        }

        // invoke the substeps
        std::vector<size_t> subsizes = compute_buffer_size(
            newm, newn, newk, newP, step + 1, rank, new_beta);

        if (expanded) {
            sizes = std::vector<size_t>(subsizes.size() + 1);
            sizes[0] = max_size;
            std::copy(subsizes.begin(), subsizes.end(), sizes.begin() + 1);
            // the buffer sizes are back to the previous values
            // (the values at the beginning of this parallel step)
            layout_->set_sizes(
                newP, size_before_expansion, newP.first() - P.first());
        } else {
            sizes = subsizes;
        }
    }

    // unshift(offset);
    layout_->set_seq_buckets(P, buckets);
    return sizes;
}

template <typename T>
void Buffer<T>::compute_max_buffer_size(Interval &m,
                                        Interval &n,
                                        Interval &k,
                                        Interval &P,
                                        int step,
                                        int rank,
                                        scalar_t beta) {
    // current submatrices that are being computed
    Interval2D a_range(m, k);
    Interval2D b_range(k, n);
    Interval2D c_range(m, n);

    // For each of P processors remember which sequential bucket we are
    // currently on
    std::vector<int> buckets = layout_->seq_buckets(P);
    // Skip all buckets that are "before" the current submatrices.
    // the relation submatrix1 <before> submatrix2 is defined in Interval2D.
    // Intuitively, this will skip all the buckets that are "above" or "on the
    // left" of the current submatrices. We say "before" because whenever we
    // split sequentially, we always first start with the "above" submatrix (if
    // the splitting is horizontal) or with the left one (if the splitting is
    // vertical). which explains the name of the relation "before".
    if (label_ == 'A') {
        layout_->update_buckets(P, a_range);
    } else if (label_ == 'B') {
        layout_->update_buckets(P, b_range);
    } else {
        layout_->update_buckets(P, c_range);
    }

    // int offset = shift(buckets[rank - P.first()]);

    // invoke a parallel or a sequential step:
    if (strategy_->final_step(step)) {
        size_t max_size = 0;
        if (label_ == 'A') {
            max_size = 1LL * m.length() * k.length();
        } else if (label_ == 'B') {
            max_size = 1LL * k.length() * n.length();
        } else {
            max_size = 1LL * m.length() * n.length();
        }

        max_base_buffer_size_ = std::max(max_base_buffer_size_, max_size);

        if (max_size > max_recv_buffer_size_) {
            max_send_buffer_size_ = max_recv_buffer_size_;
            max_recv_buffer_size_ = max_size;
        } else if (max_size > max_send_buffer_size_) {
            max_send_buffer_size_ = max_size;
        }
    } else if (strategy_->sequential_step(step)) {
        int div = strategy_->divisor(step);
        int divm = strategy_->divisor_m(step);
        int divn = strategy_->divisor_n(step);
        int divk = strategy_->divisor_k(step);

        for (int i = 0; i < div; ++i) {
            Interval newm = m.subinterval(divm, divm > 1 ? i : 0);
            Interval newn = n.subinterval(divn, divn > 1 ? i : 0);
            Interval newk = k.subinterval(divk, divk > 1 ? i : 0);

            // update beta value
            scalar_t new_beta = beta;
            if (label_ == 'C' && divk > 1) {
                if (i != 0) {
                    new_beta = scalar_t{1};
                }
            }

            compute_max_buffer_size(
                newm, newn, newk, P, step + 1, rank, new_beta);

            // if dividing over absent dimension, then all the branches are the
            // same so skip the rest
            if ((label_ == 'A' && !strategy_->split_A(step)) ||
                (label_ == 'B' && !strategy_->split_B(step)) ||
                (label_ == 'C' && !strategy_->split_C(step))) {
                break;
            }
        }
    } else {
        int div = strategy_->divisor(step);
        int divm = strategy_->divisor_m(step);
        int divn = strategy_->divisor_n(step);
        int divk = strategy_->divisor_k(step);
        // processor subinterval which the current rank belongs to
        int partition_idx = P.subinterval_index(div, rank);
        Interval newP = P.subinterval(div, partition_idx);
        // intervals of M, N and K that the current rank is in charge of,
        // together with other ranks from its group.
        // (see the definition of group and offset below)
        Interval newm = m.subinterval(divm, divm > 1 ? partition_idx : 0);
        Interval newn = n.subinterval(divn, divn > 1 ? partition_idx : 0);
        Interval newk = k.subinterval(divk, divk > 1 ? partition_idx : 0);

        int offset = rank - newP.first();

        std::vector<std::vector<int>> size_before_expansion(P.length());
        std::vector<int> total_before_expansion(P.length());
        std::vector<std::vector<int>> size_after_expansion(newP.length());
        std::vector<int> total_after_expansion(newP.length());

        bool expanded = (label_ == 'A' && !strategy_->split_A(step)) ||
                        (label_ == 'B' && !strategy_->split_B(step)) ||
                        (label_ == 'C' && !strategy_->split_C(step));

        if (expanded) {
            /*
             * this gives us the 2D interval of the matrix that will be
             expanded: if divm > 1 => matrix B expanded => Interval2D(k, n) if
             divn > 1 => matrix A expanded => Interval2D(m, k) if divk > 1 =>
             matrix C expanded => Interval2D(m, n)
            */
            Interval2D range;

            if (divm > 1)
                range = Interval2D(k, n);
            else if (divn > 1)
                range = Interval2D(m, k);
            else
                range = Interval2D(m, n);

            layout_->buffers_before_expansion(
                P, range, size_before_expansion, total_before_expansion);

            layout_->buffers_after_expansion(P,
                                             newP,
                                             size_before_expansion,
                                             total_before_expansion,
                                             size_after_expansion,
                                             total_after_expansion);

            // increase the buffer sizes before the substeps
            layout_->set_sizes(newP, size_after_expansion);

            // this is the sum of sizes of all the buckets after expansion
            // that the current rank will own.
            // which is also the size of the matrix after expansion
            size_t old_size = total_before_expansion[rank - P.first()];
            size_t new_size = total_after_expansion[rank - newP.first()];
            size_t max_size = std::max(old_size, new_size);
            if (max_size > max_recv_buffer_size_) {
                max_send_buffer_size_ = max_recv_buffer_size_;
                max_recv_buffer_size_ = max_size;
            } else if (max_size > max_send_buffer_size_) {
                max_send_buffer_size_ = max_size;
            }

            int n_blocks = size_before_expansion[rank - P.first()].size();

            if (n_blocks > 1) {
                max_reshuffle_buffer_size_ =
                    std::max(max_reshuffle_buffer_size_, new_size);
            }

            // if C was expanded, then reduce was invoked
            if (label_ == 'C') {
                int subint_index, subint_offset;
                std::tie(subint_index, subint_offset) =
                    P.locate_in_subinterval(div, rank);
                int target =
                    P.locate_in_interval(div, subint_index, subint_offset);
                max_reduce_buffer_size_ =
                    std::max(max_reduce_buffer_size_,
                             (size_t)total_before_expansion[target]);
                // std::cout << "max_reduce_buffer_size = " <<
                // max_reduce_buffer_size_ << std::endl;
            }
        }

        // if division by k, and we are in the branch where beta > 0, then
        // reset beta to 0, but keep in mind that on the way back from the
        // substeps we will have to sum the result with the local data in C this
        // is necessary since reduction happens AFTER the substeps so we cannot
        // pass beta = 1 if the data is not present there BEFORE the substeps
        scalar_t new_beta = beta;
        if (strategy_->split_k(step) && beta != scalar_t{0}) {
            new_beta = 0;
        }

        // invoke the substeps
        compute_max_buffer_size(
            newm, newn, newk, newP, step + 1, rank, new_beta);

        if (expanded) {
            // the buffer sizes are back to the previous values
            // (the values at the beginning of this parallel step)
            layout_->set_sizes(
                newP, size_before_expansion, newP.first() - P.first());
        }
    }

    // unshift(offset);
    layout_->set_seq_buckets(P, buckets);
}

template <typename T>
T* Buffer<T>::operator[](const size_t index) {
    return ctxt_->get_memory_pool().get_buffer_pointer(buffers_[index]);
}

template <typename T>
T* Buffer<T>::operator[](const size_t index) const {
    return ctxt_->get_memory_pool().get_buffer_pointer(buffers_[index]);
}

template <typename T>
size_t Buffer<T>::max_send_buffer_size() const {
    return max_send_buffer_size_;
}

template <typename T>
size_t Buffer<T>::max_recv_buffer_size() const {
    return max_recv_buffer_size_;
}

// Explicit instantiations
//
template class Buffer<double>;
template class Buffer<std::complex<double>>;
template class Buffer<float>;
template class Buffer<std::complex<float>>;

} // namespace cosma


================================================
FILE: src/cosma/buffer.hpp
================================================
#pragma once
#include <cosma/interval.hpp>
#include <cosma/layout.hpp>
#include <cosma/mapper.hpp>
#include <cosma/strategy.hpp>
#include <cosma/context.hpp>

#ifdef COSMA_HAVE_GPU
#include <Tiled-MM/util.hpp>
#endif

#include <vector>

/*
 * This class wrapps up a vector of buffers representing single matrix (A, B or
 * C). During the algorithm, a new buffer is allocated in each parallel step in
 * which this matrix was expanded (i.e. not split). However, here we also use
 * some optimization and as soon as the number of blocks of this matrix that the
 * current rank owns reaches 1 (meaning that there are no sequential steps after
 * that step), no new allocations occur, but the sending and receiving buffers
 * keep swapping. This is possible because if there are no sequential steps
 * afterwards then after the communication matrix is expanded and the receiving
 * buffer owns everything that a sending buffer owns plus the same pieces from
 * other ranks. Therefore, after communication, we don't need the sending buffer
 * and we can reuse it to be the next receiving buffer (i.e. we can keep
 * swapping sending and receiving buffers in each parallel step in which this
 * matrix was expanded as long as there are no sequential steps left.
 */

namespace cosma {

template <typename Scalar>
class Buffer {
public:
    using scalar_t = Scalar;

    // Buffer() = default;
    Buffer();

    // with cosma_context*
    Buffer(cosma_context<Scalar>* ctxt,
           Mapper *mapper,
           Layout *layout,
           bool dry_run = false);

    // without context (using global singleton context)
    Buffer(Mapper *mapper,
           Layout *layout,
           bool dry_run = false);

    ~Buffer();

    Buffer &operator=(Buffer &) = delete;
    Buffer &operator=(Buffer &&) = default;

    // allocates all the buffers that are needed for the current matrix and the
    // current rank
    void allocate_initial_buffers(bool dry_run = false);
    void allocate_communication_buffers(bool dry_run = false);

    void free_initial_buffers(bool dry_run = false);
    void free_communication_buffers(bool dry_run = false);

    // get an array of all sizes that this matrix will require
    std::vector<size_t> get_all_buffer_sizes();

    // increases the index of the current buffer
    void advance_buffer();
    // returns the index of the current buffer
    int buffer_index();
    // sets the index of the current buffer to idx
    void set_buffer_index(int idx);
    // swaps ids of current_buffer and reduce buffer
    void swap_reduce_buffer_with(size_t buffer_idx);

    // returns the pointer to the current buffer
    scalar_t* buffer_ptr();
    const scalar_t* buffer_ptr() const;
    const size_t buffer_size() const;

    // pointer to the reshuffle buffer used when n_blocks > 1
    scalar_t *reshuffle_buffer_ptr();
    // pointer to the parallel-reduce buffer used when beta > 0
    scalar_t *reduce_buffer_ptr();
    // returns index of a buffer that is used in gemm
    // it can be either last or pre-last buffer
    // depending on the parity of #parallel steps
    // after the last sequential step.
    // (since only last two buffers keep swapping).
    int buff_index_before_gemm() const;

    // returns the initial buffer (i.e. with index 0)
    // this buffer owns the initial matrix data
    scalar_t* initial_buffer_ptr();
    const scalar_t* initial_buffer_ptr() const;
    const size_t initial_buffer_size() const;

    // we can access i-th buffer of this class with [] operator
    scalar_t* operator[](const size_t index);
    scalar_t* operator[](const size_t index) const;

    // can be A, B or C, determining the matrix
    char label_;
    // the strategy is owned by the matrix object. here only the pointer to
    // avoid the creation of the strategy again.
    const Strategy *strategy_;
    // current rank
    int rank_;

    // used to get the size of the initial buffer
    Mapper *mapper_;
    // used to get the sizes of buffers needed in each step
    Layout *layout_;

protected:
    // computes the buffer sizes that is needed for this matrix (where
    // label_="A", "B" or "C"); the length of this vector is the number of
    // different buffers that is needed.
    std::vector<size_t> compute_buffer_size();
    std::vector<size_t> compute_buffer_size(Interval &m,
                                               Interval &n,
                                               Interval &k,
                                               Interval &P,
                                               int step,
                                               int rank,
                                               scalar_t beta);

    // when the number of blocks that the current rank owns from this matrix
    // reaches 1 (meaning that there are no sequential steps left) then no new
    // buffers are allocated. From this moment, this function is invoked and it
    // follows the tree of execution of the algorithm and finds the largest two
    // buffers for this matrix that are needed. These two buffers will then be
    // reused and swapped in the whole subtree of the execution.
    void compute_max_buffer_size(Interval &m,
                                 Interval &n,
                                 Interval &k,
                                 Interval &P,
                                 int step,
                                 int rank,
                                 scalar_t beta);

    // initializes two arrays:
    // 1. n_buckets_ : this vectors gives us for each step of the algorithm the
    // number of
    //     different blocks that the current ranks owns from the current matrix.
    //     The number of blocks in step i is equal to the product of divisors in
    //     all sequential steps j > i (thus excluding step i) in which the
    //     current matrix was split.

    // 2. expanded_after_ : for each step i of the algorithm returns true/false
    // showing whether
    //     the current matrix was expanded in some of the following parallel
    //     steps (including the i-th step). The matrix is expanded in a parallel
    //     step if it is NOT split in that step.
    void compute_n_buckets();


    cosma_context<Scalar>* ctxt_;

    // computes the number of buckets in the current step
    // the number of buckets in some step i is equal to the
    // product of all divisors in sequential steps that follow step i
    // in which the current matrix was divided
    std::vector<int> n_buckets_;
    std::vector<bool> expanded_after_;

    // vector of buffers being used for the current matrix (given by label)
    // by the current rank (determined by variable rank_)
    std::vector<size_t> buffers_;
    std::vector<size_t> buff_sizes_;
    // temporary buffer used for reshuffling of data received from other ranks
    // this happens when sequential steps are present, i.e. when n_blocks > 1
    size_t reshuffle_buffer_;
    // temporary buffer used in parallel-reduce step (two-sided communication)
    // used when beta > 0 (to save current C)
    size_t reduce_buffer_;
    // pointer to the current buffer being used in the previous vector of
    // buffers
    int current_buffer_ = 0;

    // buffer used in sequential steps for reshuffling
    size_t max_reshuffle_buffer_size_ = 0;
    // buffer used in parallel reduce step, when beta == 1
    size_t max_reduce_buffer_size_ = 0;

    // computed by compute_max_buffer_size function. represent the two largest
    // buffer sizes (max_recv_buffer_size >= max_send_buffer_size);
    size_t max_send_buffer_size_ = 0;
    size_t max_recv_buffer_size_ = 0;
    size_t max_par_block_size_ = 0;
    // max size of the matrix in the base case (among all base cases)
    size_t max_base_buffer_size_ = 0;
    size_t max_send_buffer_size() const;
    size_t max_recv_buffer_size() const;

    void init_first_split_steps();
    // first seq step that splits the current matrix
    int first_seq_split_step;
    int last_first_seq_split_step;
    // first parallel step that does expands (i.e. does not split) the current
    // matrix
    int first_par_extend_step;

    // if true, memory already pinned
    bool pinned_ = false;
};

} // namespace cosma


================================================
FILE: src/cosma/cinterface.cpp
================================================
#include <costa/grid2grid/grid2D.hpp>

#include <cosma/cinterface.hpp>
#include <cosma/multiply.hpp>

#include <mpi.h>

namespace cosma {

template <class T>
costa::grid_layout<T> grid_from_clayout(int n_ranks,
                                            const ::layout *layout) {

    // Create the local blocks
    std::vector<costa::block<T>> loc_blks;

    // Create blocks
    for (int i = 0; i < layout->nlocalblocks; ++i) {
        auto &block = layout->localblocks[i];
        auto row = block.row;
        auto col = block.col;
        auto ptr = reinterpret_cast<T *>(block.data);
        auto stride = block.ld;

        costa::block_coordinates coord{row, col};
        costa::interval rows{layout->rowsplit[row],
                             layout->rowsplit[row + 1]};
        costa::interval cols{layout->colsplit[col],
                                 layout->colsplit[col + 1]};
        loc_blks.emplace_back(rows, cols, coord, ptr, stride);
    }

    // Grid specification
    std::vector<int> rows_split(layout->rowblocks + 1);
    std::copy_n(layout->rowsplit, rows_split.size(), rows_split.begin());

    std::vector<int> cols_split(layout->colblocks + 1);
    std::copy_n(layout->colsplit, cols_split.size(), cols_split.begin());

    std::vector<std::vector<int>> owners_matrix(layout->rowblocks);
    for (int i = 0; i < layout->rowblocks; ++i) {
        owners_matrix[i].resize(layout->colblocks);
        for (int j = 0; j < layout->colblocks; ++j)
            owners_matrix[i][j] = layout->owners[i * layout->colblocks + j];
    }

    return {{{std::move(rows_split), std::move(cols_split)},
             std::move(owners_matrix),
             n_ranks},
            {std::move(loc_blks)},
            'C'};
}

template <class T>
void xmultiply_using_layout(MPI_Comm comm,
                            const char *transa,
                            const char *transb,
                            const T *alpha,
                            const layout *layout_a,
                            const layout *layout_b,
                            const T *beta,
                            const layout *layout_c) {

    // communicator size and rank
    int rank, P;
    MPI_Comm_size(comm, &P);
    MPI_Comm_rank(comm, &rank);

    auto cosma_layout_a = grid_from_clayout<T>(P, layout_a);
    auto cosma_layout_b = grid_from_clayout<T>(P, layout_b);
    auto cosma_layout_c = grid_from_clayout<T>(P, layout_c);

    // perform cosma multiplication
    cosma::multiply_using_layout<T>(
        cosma_layout_a, cosma_layout_b, cosma_layout_c, *alpha, *beta, *transa, *transb, comm);
}
} // namespace cosma

#ifdef __cplusplus
extern "C" {
#endif

void smultiply_using_layout(MPI_Comm comm,
                            const char *transa,
                            const char *transb,
                            const float *alpha,
                            const layout *layout_a,
                            const layout *layout_b,
                            const float *beta,
                            const layout *layout_c) {

    cosma::xmultiply_using_layout<float>(
        comm, transa, transb, alpha, layout_a, layout_b, beta, layout_c);
}

void dmultiply_using_layout(MPI_Comm comm,
                            const char *transa,
                            const char *transb,
                            const double *alpha,
                            const layout *layout_a,
                            const layout *layout_b,
                            const double *beta,
                            const layout *layout_c) {

    cosma::xmultiply_using_layout<double>(
        comm, transa, transb, alpha, layout_a, layout_b, beta, layout_c);
}

void cmultiply_using_layout(MPI_Comm comm,
                            const char *transa,
                            const char *transb,
                            const float *alpha,
                            const layout *layout_a,
                            const layout *layout_b,
                            const float *beta,
                            const layout *layout_c) {

    cosma::xmultiply_using_layout<std::complex<float>>(
        comm,
        transa,
        transb,
        reinterpret_cast<const std::complex<float> *>(alpha),
        layout_a,
        layout_b,
        reinterpret_cast<const std::complex<float> *>(beta),
        layout_c);
}

void zmultiply_using_layout(MPI_Comm comm,
                            const char *transa,
                            const char *transb,
                            const double *alpha,
                            const layout *layout_a,
                            const layout *layout_b,
                            const double *beta,
                            const layout *layout_c) {

    cosma::xmultiply_using_layout<std::complex<double>>(
        comm,
        transa,
        transb,
        reinterpret_cast<const std::complex<double> *>(alpha),
        layout_a,
        layout_b,
        reinterpret_cast<const std::complex<double> *>(beta),
        layout_c);
}

#ifdef __cplusplus
}
#endif


================================================
FILE: src/cosma/cinterface.hpp
================================================
#pragma once

#include <mpi.h>

#ifdef __cplusplus
extern "C" {
#endif

/**
 * A local block of the matrix.
 * data: a pointer to the start of the local matrix A_loc
 * ld: leading dimension or distance between two columns of A_loc
 * row: the global block row index
 * col: the global block colum index
 */
struct block {
    void *data;
    const int ld;
    const int row;
    const int col;
};

/**
 * Description of a distributed layout of a matrix
 * rowblocks: number of gobal blocks
 * colblocks: number of gobal blocks
 * rowsplit: [rowsplit[i], rowsplit[i+1]) is range of rows of block i
 * colsplit: [colsplit[i], colsplit[i+1]) is range of columns of block i
 * nlocalblock: number of blocks owned by the current rank
 * localblcoks: an array of block descriptions of the current rank
 */
struct layout {
    int rowblocks;
    int colblocks;
    const int *rowsplit;
    const int *colsplit;
    const int *owners;
    int nlocalblocks;
    block *localblocks;
};

void smultiply_using_layout(MPI_Comm comm,
                            const char *transa,
                            const char *transb,
                            const float *alpha,
                            const layout *layout_a,
                            const layout *layout_b,
                            const float *beta,
                            const layout *layout_c);

void dmultiply_using_layout(MPI_Comm comm,
                            const char *transa,
                            const char *transb,
                            const double *alpha,
                            const layout *layout_a,
                            const layout *layout_b,
                            const double *beta,
                            const layout *layout_c);

void cmultiply_using_layout(MPI_Comm comm,
                            const char *transa,
                            const char *transb,
                            const float *alpha,
                            const layout *layout_a,
                            const layout *layout_b,
                            const float *beta,
                            const layout *layout_c);

void zmultiply_using_layout(MPI_Comm comm,
                            const char *transa,
                            const char *transb,
                            const double *alpha,
                            const layout *layout_a,
                            const layout *layout_b,
                            const double *beta,
                            const layout *layout_c);

#ifdef __cplusplus
}
#endif


================================================
FILE: src/cosma/communicator.cpp
================================================
#include <complex>

#include <cosma/communicator.hpp>
#include <cosma/one_sided_communicator.hpp>
#include <cosma/two_sided_communicator.hpp>

#if defined(COSMA_HAVE_GPU) && defined(COSMA_WITH_NCCL)
#include <cosma/gpu/nccl_utils.hpp>
#endif

namespace cosma {
bool communicator::use_busy_waiting = true;

communicator::communicator(const Strategy strategy, 
                           MPI_Comm comm)
    : strategy_(strategy) {

    use_busy_waiting = strategy_.use_busy_waiting;

    MPI_Comm_rank(comm, &rank_);
    // rank_ = reordered_rank(rank_);
    MPI_Comm_size(comm, &comm_size_);
    // check if the reordered rank belongs 
    // to this communicator
    assert(rank_ < comm_size_);
    using_reduced_comm_ = comm_size_ != strategy.P;
    is_idle_ = rank_ >= strategy.P;

    if (using_reduced_comm_) {
        MPI_Group group;
        MPI_Comm_group(comm, &group);
        std::vector<int> exclude_ranks;
        for (int i = strategy.P; i < comm_size_; ++i) {
            // exclude_ranks.push_back(reordered_rank(i));
            exclude_ranks.push_back(i);
        }

        MPI_Group reduced_group;

        MPI_Group_excl(group,
                       exclude_ranks.size(),
                       exclude_ranks.data(),
                       &reduced_group);
        MPI_Comm_create_group(comm, reduced_group, 0, &full_comm_);

        MPI_Group_free(&group);
        MPI_Group_free(&reduced_group);
    } else {
        // this communicator has to be duplicated as it's being cached.
        // The user might deallocate the comm as it's allocated outside of COSMA
        // for this reason, we have to ensure that we duplicate the comm
        // before it's cached in the COSMA context
        MPI_Comm_dup(comm, &full_comm_);
        // full_comm_ = comm;
    }

    if (is_idle_) {
        return;
    }

    if (strategy_.topology) {
        add_topology();
    }

    create_communicators(full_comm_);
    // split_communicators(full_comm_);

    step_to_comm_index_ = std::vector<int>(strategy_.n_steps());
    int idx = 0;
    for (int i = 0; i < strategy_.n_steps(); ++i) {
        step_to_comm_index_[i] = idx;
        if (strategy_.parallel_step(i))
            idx++;
    }
}

communicator::~communicator() {
    if (!is_idle_) {
        free_comms();
    }
}

bool communicator::is_idle() { return is_idle_; }

// helper function for add_topology
// every communication pair represents one edge in the topology graph
// this functions finds all edges for the current rank
// weight of the edge is given by the amount of communicated data
void communicator::get_topology_edges(std::vector<int> &dest,
                                      std::vector<int> &weight) {
    int m = strategy_.m;
    int n = strategy_.n;
    int k = strategy_.k;
    Interval P(0, strategy_.P - 1);
    int n_steps = strategy_.n_steps();

    for (int step = 0; step < n_steps; ++step) {
        m /= strategy_.divisor_m(step);
        n /= strategy_.divisor_n(step);
        k /= strategy_.divisor_k(step);

        if (strategy_.parallel_step(step)) {
            int div = strategy_.divisor(step);
            int partition_idx = P.subinterval_index(div, rank_);
            Interval newP = P.subinterval(div, partition_idx);
            int group, offset;
            std::tie(group, offset) = group_and_offset(P, div);

            for (int gp = 0; gp < div; ++gp) {
                int neighbor =
                    P.first() + rank_outside_ring(P, div, offset, gp);
                if (neighbor == rank_)
                    continue;
                dest.push_back(neighbor);

                int communication_size = 0;
                if (strategy_.split_n(step))
                    communication_size = m * k / newP.length();
                else if (strategy_.split_m(step))
                    communication_size = k * n / newP.length();
                else
                    communication_size = m * n / newP.length();

                weight.push_back(communication_size);
            }

            P = newP;
        }
    }
}

void communicator::add_topology() {
    int n_sources = 1;
    int source[1] = {rank_};
    std::vector<int> dest;
    std::vector<int> weight;

    get_topology_edges(dest, weight);

    int n_edges = dest.size();
    int degree = dest.size();
    int degrees[1] = {degree};

    if (n_edges >= 1) {
        MPI_Dist_graph_create(full_comm_,
                              n_sources,
                              source,
                              degrees,
                              dest.data(),
                              weight.data(),
                              MPI_INFO_NULL,
                              true,
                              &full_comm_);
    }
}

void communicator::initialize(int *argc, char ***argv) { MPI_Init(argc, argv); }

int communicator::rank() { return rank_; }

void communicator::full_barrier() { MPI_Barrier(full_comm_); }

void communicator::barrier(int step) {
    int comm_index = step_to_comm_index_[step];
    MPI_Barrier(comm_ring_[comm_index]);
}

MPI_Comm communicator::full_comm() {
    return full_comm_;
}

MPI_Comm communicator::active_comm(int step) {
    int comm_index = step_to_comm_index_[step];
    return comm_ring_[comm_index];
}

#ifdef COSMA_WITH_NCCL
ncclComm_t communicator::active_nccl_comm(int step) {
    int comm_index = step_to_comm_index_[step];
    return nccl_comm_ring_[comm_index];
}
#endif

int communicator::comm_size() { return comm_size_; }

void communicator::free_comm(MPI_Comm &comm) { 
    int mpi_finalized;
    MPI_Finalized(&mpi_finalized);
    if (!mpi_finalized) {
        MPI_Comm_free(&comm); 
    }
}

void communicator::free_group(MPI_Group &group) { MPI_Group_free(&group); }

void communicator::finalize() { MPI_Finalize(); }

int communicator::relative_rank(Interval &P, int r) { return r - P.first(); }

int communicator::relative_rank(Interval &P) { return relative_rank(P, rank_); }

int communicator::offset(Interval &P, int div, int r) {
    return P.subinterval_offset(div, r);
}

int communicator::offset(Interval &P, int div) { return offset(P, div, rank_); }

int communicator::group(Interval &P, int div, int r) {
    return P.subinterval_index(div, r);
}

int communicator::group(Interval &P, int div) { return group(P, div, rank_); }

std::pair<int, int>
communicator::group_and_offset(Interval &P, int div, int r) {
    return P.locate_in_subinterval(div, r);
}

std::pair<int, int> communicator::group_and_offset(Interval &P, int div) {
    return group_and_offset(P, div, rank_);
}

int communicator::rank_inside_ring(Interval &P, int div, int global_rank) {
    return group(P, div, global_rank);
}

int communicator::rank_inside_ring(Interval &P, int div) {
    return rank_inside_ring(P, div, rank_);
}

int communicator::rank_outside_ring(Interval &P, int div, int off, int i) {
    return P.locate_in_interval(div, i, off);
}

void communicator::split_communicators(MPI_Comm comm) {
    // MPI_Comm_group(comm, &comm_group);
    Interval P(0, strategy_.P - 1);
    // iterate through all steps and for each parallel
    // step, create a suitable subcommunicator
    for (int step = 0; step < strategy_.n_steps(); ++step) {
        if (strategy_.parallel_step(step)) {
            int div = strategy_.divisor(step);
            int partition_idx = P.subinterval_index(div, rank_);
            Interval newP = P.subinterval(div, partition_idx);
            int group, offset;
            std::tie(group, offset) = group_and_offset(P, div, rank_);
            MPI_Comm comm_ring, comm_subproblem;
            MPI_Comm_split(comm, group, offset, &comm_subproblem);
            MPI_Comm_split(comm, offset, group, &comm_ring);

            comm_ring_.push_back(comm_ring);
            comm_subproblem_.push_back(comm_subproblem);

#ifdef COSMA_WITH_NCCL
            nccl_comm_ring_.push_back(gpu::mpi_to_nccl_comm(comm_ring_.back()));
            nccl_comm_subproblem_.push_back(gpu::mpi_to_nccl_comm(comm_subproblem_.back()));
#endif

            comm = comm_subproblem;
            P = newP;
        }
    }
}

MPI_Comm create_comm(MPI_Comm& comm, std::vector<int>& ranks) {
    MPI_Comm newcomm;
    MPI_Group subgroup;

    MPI_Group comm_group;
    MPI_Comm_group(comm, &comm_group);

    MPI_Group_incl(comm_group, ranks.size(), ranks.data(), &subgroup);
    MPI_Comm_create_group(comm, subgroup, 0, &newcomm);

    communicator::free_group(subgroup);
    communicator::free_group(comm_group);

    return newcomm;
}


void communicator::create_communicators(MPI_Comm comm) {
    // MPI_Comm_group(comm, &comm_group);
    Interval P(0, strategy_.P - 1);

    // iterate through all steps and for each parallel
    // step, create a suitable subcommunicator
    for (int step = 0; step < strategy_.n_steps(); ++step) {
        if (strategy_.parallel_step(step)) {
            int div = strategy_.divisor(step);
            int partition_idx = P.subinterval_index(div, rank_);
            Interval newP = P.subinterval(div, partition_idx);
            int group, offset;
            std::tie(group, offset) = group_and_offset(P, div, rank_);

            comm_ring_.emplace_back(create_comm_ring(comm, P, offset, div));
            comm_subproblem_.emplace_back(create_comm_subproblem(comm, P, newP));

#ifdef COSMA_WITH_NCCL
            nccl_comm_ring_.emplace_back(gpu::mpi_to_nccl_comm(comm_ring_.back()));
            nccl_comm_subproblem_.emplace_back(gpu::mpi_to_nccl_comm(comm_subproblem_.back()));
#endif

            comm = comm_subproblem_.back();
            P = newP;
        }
    }
}

MPI_Comm communicator::create_comm_ring(MPI_Comm comm,
                                       Interval &P,
                                       int offset,
                                       int div) {
    std::vector<int> ranks(div);
    for (int i = 0; i < div; ++i) {
        ranks[i] = rank_outside_ring(P, div, offset, i);
    }

    return create_comm(comm, ranks);
}

MPI_Comm communicator::create_comm_subproblem(MPI_Comm comm,
                                     Interval &P,
                                     Interval &newP) {
    MPI_Comm newcomm;
    MPI_Group subgroup;

    MPI_Group comm_group;
    MPI_Comm_group(comm, &comm_group);

    std::vector<int> ranks(newP.length());
    for (int i = 0; i < ranks.size(); ++i) {
        ranks[i] = relative_rank(P, newP.first() + i);
    }

    MPI_Group_incl(comm_group, ranks.size(), ranks.data(), &subgroup);
    MPI_Comm_create(comm, subgroup, &newcomm);

    free_group(subgroup);
    free_group(comm_group);

    return newcomm;
}

void communicator::free_comms() {
    for (int i = comm_subproblem_.size() - 1; i >= 0; --i) {
        free_comm(comm_subproblem_[i]);
#ifdef COSMA_WITH_NCCL
        gpu::free_nccl_comm(nccl_comm_subproblem_[i]);
#endif
    }
    for (int i = comm_ring_.size() - 1; i >= 0; --i) {
        free_comm(comm_ring_[i]);
#ifdef COSMA_WITH_NCCL
        gpu::free_nccl_comm(nccl_comm_ring_[i]);
#endif
    }
    // if (using_reduced_comm_) {
    free_comm(full_comm_);
    full_comm_ = MPI_COMM_NULL;
    //}
}

template <typename Scalar>
void communicator::copy(Interval &P,
                        Scalar *in,
                        Scalar *out,
                        Scalar *reshuffle_buffer,
                        std::vector<std::vector<int>> &size_before,
                        std::vector<int> &total_before,
                        int total_after,
                        int step) {
    MPI_Comm comm = active_comm(step);
    two_sided_communicator::copy(comm,
                                 rank(),
                                 strategy_.divisor(step),
                                 P,
                                 in,
                                 out,
                                 reshuffle_buffer,
                                 size_before,
                                 total_before,
                                 total_after);
}

template <typename Scalar>
void communicator::reduce(Interval &P,
                          Scalar *in,
                          Scalar *out,
                          Scalar *reshuffle_buffer,
                          Scalar *reduce_buffer,
                          std::vector<std::vector<int>> &c_current,
                          std::vector<int> &c_total_current,
                          std::vector<std::vector<int>> &c_expanded,
                          std::vector<int> &c_total_expanded,
                          Scalar alpha,
                          Scalar beta,
                          int step) {
    MPI_Comm comm = active_comm(step);
    two_sided_communicator::reduce(comm,
                                   rank(),
                                   strategy_.divisor(step),
                                   P,
                                   in,  // LC
                                   out, // C
                                   reshuffle_buffer,
                                   reduce_buffer,
                                   c_current,
                                   c_total_current,
                                   c_expanded,
                                   c_total_expanded,
                                   beta);
}

template <typename Scalar>
void communicator::overlap_comm_and_comp(cosma_context<Scalar> *ctx,
                                         CosmaMatrix<Scalar> &matrixA,
                                         CosmaMatrix<Scalar> &matrixB,
                                         CosmaMatrix<Scalar> &matrixC,
                                         Interval &m,
                                         Interval &n,
                                         Interval &k,
                                         Interval &P,
                                         size_t step,
                                         Scalar alpha,
                                         Scalar beta) {
    MPI_Comm comm = active_comm(step);
    one_sided_communicator::overlap_comm_and_comp(ctx,
                                                  comm,
                                                  rank(),
                                                  strategy_,
                                                  matrixA,
                                                  matrixB,
                                                  matrixC,
                                                  m,
                                                  n,
                                                  k,
                                                  P,
                                                  step,
                                                  alpha,
                                                  beta);
}

const Strategy communicator::get_strategy() {
    return strategy_;
}

// Explicit instantiations for `copy`
//
template void
communicator::copy<double>(Interval &P,
                           double *in,
                           double *out,
                           double *reshuffle_buffer,
                           std::vector<std::vector<int>> &size_before,
                           std::vector<int> &total_before,
                           int total_after,
                           int step);

template void
communicator::copy<float>(Interval &P,
                          float *in,
                          float *out,
                          float *reshuffle_buffer,
                          std::vector<std::vector<int>> &size_before,
                          std::vector<int> &total_before,
                          int total_after,
                          int step);

template void communicator::copy<std::complex<float>>(
    Interval &P,
    std::complex<float> *in,
    std::complex<float> *out,
    std::complex<float> *reshuffle_buffer,
    std::vector<std::vector<int>> &size_before,
    std::vector<int> &total_before,
    int total_after,
    int step);

template void communicator::copy<std::complex<double>>(
    Interval &P,
    std::complex<double> *in,
    std::complex<double> *out,
    std::complex<double> *reshuffle_buffer,
    std::vector<std::vector<int>> &size_before,
    std::vector<int> &total_before,
    int total_after,
    int step);

// Explicit instantiations for `reduce`
//
template void
communicator::reduce<float>(Interval &P,
                            float *in,
                            float *out,
                            float *reshuffle_buffer,
                            float *reduce_buffer,
                            std::vector<std::vector<int>> &c_current,
                            std::vector<int> &c_total_current,
                            std::vector<std::vector<int>> &c_expanded,
                            std::vector<int> &c_total_expanded,
                            float alpha,
                            float beta,
                            int step);

template void
communicator::reduce<double>(Interval &P,
                             double *in,
                             double *out,
                             double *reshuffle_buffer,
                             double *reduce_buffer,
                             std::vector<std::vector<int>> &c_current,
                             std::vector<int> &c_total_current,
                             std::vector<std::vector<int>> &c_expanded,
                             std::vector<int> &c_total_expanded,
                             double alpha,
                             double beta,
                             int step);

template void communicator::reduce<std::complex<float>>(
    Interval &P,
    std::complex<float> *in,
    std::complex<float> *out,
    std::complex<float> *reshuffle_buffer,
    std::complex<float> *reduce_buffer,
    std::vector<std::vector<int>> &c_current,
    std::vector<int> &c_total_current,
    std::vector<std::vector<int>> &c_expanded,
    std::vector<int> &c_total_expanded,
    std::complex<float> alpha,
    std::complex<float> beta,
    int step);

template void communicator::reduce<std::complex<double>>(
    Interval &P,
    std::complex<double> *in,
    std::complex<double> *out,
    std::complex<double> *reshuffle_buffer,
    std::complex<double> *reduce_buffer,
    std::vector<std::vector<int>> &c_current,
    std::vector<int> &c_total_current,
    std::vector<std::vector<int>> &c_expanded,
    std::vector<int> &c_total_expanded,
    std::complex<double> alpha,
    std::complex<double> beta,
    int step);

// Explicit instantiations for `overlap_comm_and_comp`
//
template void
communicator::overlap_comm_and_comp<double>(cosma_context<double> *ctx,
                                            CosmaMatrix<double> &matrixA,
                                            CosmaMatrix<double> &matrixB,
                                            CosmaMatrix<double> &matrixC,
                                            Interval &m,
                                            Interval &n,
                                            Interval &k,
                                            Interval &P,
                                            size_t step,
                                            double alpha,
                                            double beta);
template void
communicator::overlap_comm_and_comp<float>(cosma_context<float> *ctx,
                                           CosmaMatrix<float> &matrixA,
                                           CosmaMatrix<float> &matrixB,
                                           CosmaMatrix<float> &matrixC,
                                           Interval &m,
                                           Interval &n,
                                           Interval &k,
                                           Interval &P,
                                           size_t step,
                                           float alpha,
                                           float beta);

template void communicator::overlap_comm_and_comp<std::complex<float>>(
    cosma_context<std::complex<float>> *ctx,
    CosmaMatrix<std::complex<float>> &matrixA,
    CosmaMatrix<std::complex<float>> &matrixB,
    CosmaMatrix<std::complex<float>> &matrixC,
    Interval &m,
    Interval &n,
    Interval &k,
    Interval &P,
    size_t step,
    std::complex<float> alpha,
    std::complex<float> beta);

template void communicator::overlap_comm_and_comp<std::complex<double>>(
    cosma_context<std::complex<double>> *ctx,
    CosmaMatrix<std::complex<double>> &matrixA,
    CosmaMatrix<std::complex<double>> &matrixB,
    CosmaMatrix<std::complex<double>> &matrixC,
    Interval &m,
    Interval &n,
    Interval &k,
    Interval &P,
    size_t step,
    std::complex<double> alpha,
    std::complex<double> beta);

} // namespace cosma


================================================
FILE: src/cosma/communicator.hpp
================================================
#pragma once

#include <algorithm>
#include <iostream>
#include <mpi.h>
#include <stdlib.h>
#include <tuple>

#include <cosma/interval.hpp>
#include <cosma/matrix.hpp>
#include <cosma/strategy.hpp>
#include <cosma/context.hpp>

#if defined(COSMA_WITH_NCCL) && defined(TILED_MM_CUDA)
#include <nccl.h>
#endif

#if defined(COSMA_WITH_NCCL) && defined(TILED_MM_ROCM)
#include <rccl/rccl.h>
#endif

namespace cosma {

// forward-declaration
// template <typename T>
// class cosma_context;

class communicator {
  public:
    communicator() = default;
    communicator(const Strategy strategy, MPI_Comm comm);
    ~communicator();

    /* In each communication step, processors are split and the communication is
     * performed. P processors are split into d groups (d = divisor in this
     * step), where each group consists of P/d processors.
     *
     * Communication rings are then created by taking 1 processor from each
     * group with the same offset within that group.
     * ------------------------------------------------------------------------------
     * Example: P = 12, d = 3:
     *    - 3 groups with 4 elements: [0, 1, 2, 3], [4, 5, 6, 7], [8, 9, 10, 11]
     *    - 4 communication rings: [0, 4, 8], [1, 5, 9], [2, 6, 10], [3, 7, 11]
     *
     * For this reason, to each rank, we assign two numbers: (gp, offset)
     * describing which group this rank belongs to and what offset within
     * its group this rank has. The offset uniquely determines the
     * communication ring that the rank belongs to.
     * ------------------------------------------------------------------------------
     * Example: P = 12, d = 3:
     *    - rank 1 belongs to group 1 and has offset 0 within this group.
     *    - rank 4 belongs to group 0 and has offset 1 within this group.
     *    - rank 10 belongs to group 2 and has offset 2 within this group.
     * ------------------------------------------------------------------------------
     */

    /* Performs all-gather type of communication within each communiction ring.
     *
     * During the communication, ranks exchange the contents of "in" buffers.
     * After the communication, all ranks within the same communication ring
     * have exactly the same content in their "out" buffers, that contains
     * the result of the all-gather communication. This means that after
     * the communication, all the ranks within the same communication ring
     * become completely independent of each other, since they have the same
     * data regarding the communicated matrix.
     * ------------------------------------------------------------------------------
     * Example: P = 12, d = 3, first communication ring performs the following
     * communication:
     * ------------------------------------------------------------------------------
     * BEFORE COMMUNICATION:
     * rank 0:
     *      buffer "in": a1, a2, a3 (different blocks)
     * rank 4:
     *      buffer "in": b1, b2, b3
     * rank 8:
     *      buffer "in": c1, c2, c3
     * ------------------------------------------------------------------------------
     * AFTER COMMUNICATION:
     * ranks 0, 4, 8 have identical out buffer with the following content:
     *      buffer "out": a1, b1, c1, a2, b2, c2, a3, b3, c3
     * ------------------------------------------------------------------------------
     * All ranks in the same communication ring have the same number of blocks,
     * but all blocks can potentially have different sizes. The total number of
     * blocks per rank is equal to the product of all divisors in sequential
     * steps (only in sequential steps) in which this matrix was split. However,
     * not all the blocks that a rank owns are necessarily exchanged in a single
     * invocation of this function. Only blocks belonging to the current
     * submatrix are being exchanged within a single invocation of copy.
     */
    template <typename Scalar>
    void copy(Interval &P,
              Scalar *in,
              Scalar *out,
              Scalar *reshuffle_buffer,
              std::vector<std::vector<int>> &size_before,
              std::vector<int> &total_before,
              int total_after,
              int step);

    /* Performs reduce-scatter type of communication within each communiction
     * ring. This can be thought as the inverse of copy, because here all ranks
     * in the same communication ring have exactly the same number of elements
     * in their "in" buffers.
     *
     * Each rank splits the data they have into equal number of blocks and then
     * each rank reduces only a subset of blocks over all the ranks. Therefore,
     * in copy the local buffers expand after the communication, whereas here
     * the local buffers shrink because the rank wants to keep only a subset of
     * blocks.
     * ------------------------------------------------------------------------------
     * Example: P = 12, d = 3, first communication ring performs the following
     * communication:
     * ------------------------------------------------------------------------------
     * BEFORE COMMUNICATION:
     * Each rank has the same structure of data: the same number of
     * equally-sized blocks, but with possibly (almost surely) different content
     * inside blocks. This data represents the partial results of the matrix C
     * that should be reduced. Here, block a1 in rank 0 and in rank 4 can have
     * (and probably will) different content (different partial results) but the
     * size of the block a1 will be the same in all the ranks of the same
     * communication ring.
     *
     * rank 0:
     *      buffer "in": a1, b1, c1, a2, b2, c2, a3, b3, c3
     * rank 4:
     *      buffer "in": a1, b1, c1, a2, b2, c2, a3, b3, c3
     * rank 8:
     *      buffer "in": a1, b1, c1, a2, b2, c2, a3, b3, c3
     * ------------------------------------------------------------------------------
     * AFTER COMMUNICATION:
     * rank 0:
     *      buffer "in": a1, a2, a3 (summed over ranks)
     * rank 4:
     *      buffer "in": b1, b2, b3 (summed over ranks)
     * rank 8:
     *      buffer "in": c1, c2, c3 (summed over ranks)
     *
     * where each block after the communication (say block a1) is actually the
     * sum of all a1-blocks in all the ranks of this communication ring
     * ------------------------------------------------------------------------------
     * All ranks in the same communication ring have the same number of blocks,
     * but all blocks can potentially have different sizes. The total number of
     * blocks per rank is equal to the product of all divisors in sequential
     * steps (only in sequential steps) in which this matrix was split. However,
     * not all the blocks that a rank owns are necessarily exchanged in a single
     * invocation of this function. Only blocks belonging to the current
     * submatrix are being exchanged within a single invocation of reduce.
     */
    template <typename Scalar>
    void reduce(Interval &P,
                Scalar *in,
                Scalar *out,
                Scalar *reshuffle_buffer,
                Scalar *reduce_buffer,
                std::vector<std::vector<int>> &c_current,
                std::vector<int> &c_total_current,
                std::vector<std::vector<int>> &c_expanded,
                std::vector<int> &c_total_expanded,
                Scalar alpha,
                Scalar beta,
                int step);

    template <typename Scalar>
    void overlap_comm_and_comp(cosma_context<Scalar> *ctx,
                               CosmaMatrix<Scalar> &matrixA,
                               CosmaMatrix<Scalar> &matrixB,
                               CosmaMatrix<Scalar> &matrixC,
                               Interval &m,
                               Interval &n,
                               Interval &k,
                               Interval &P,
                               size_t step,
                               Scalar alpha,
                               Scalar beta);

    // creates the graph that represents the topology of mpi communicator
    // it is "aware" of all the communications that will happen throughout
    void add_topology();

    static bool use_busy_waiting;

    // invokes MPI_Init
    static void initialize(int *argc, char ***argv);

    // rank in the initial communicator
    int rank();
    int relative_rank(Interval &P);
    int offset(Interval &P, int div);
    int group(Interval &P, int div);
    std::pair<int, int> group_and_offset(Interval &P, int div);
    int rank_inside_ring(Interval &P, int div);

    const Strategy *strategy();

    // barrier over all the ranks taking part in the multiplication
    void full_barrier();
    // barrier over the active communicator in step
    void barrier(int step);

    // communicator active in step
    MPI_Comm active_comm(int step);
#ifdef COSMA_WITH_NCCL
    // nccl communicator active in step
    ncclComm_t active_nccl_comm(int step);
#endif
    MPI_Comm full_comm();

    // size of the initial communicator
    int comm_size();

    // true if this rank is not taking part in the multiplication
    // this might happen if the total number of ranks is e.g. prime
    // or does not yield a convenient processor decomposition
    bool is_idle();

    // wrappers around MPI_Comm_free and MPI_Group_free
    static void free_comm(MPI_Comm &comm);
    static void free_group(MPI_Group &comm_group);

    // wrapper around MPI_Finalize
    static void finalize();

    static int relative_rank(Interval &P, int rank);
    static int offset(Interval &P, int div, int rank);
    static int group(Interval &P, int div, int rank);
    static std::pair<int, int> group_and_offset(Interval &P, int div, int rank);

    /*
       We split P processors into div groups of P/div processors.
     * gp from [0..(div-1)] is the id of the group of the current rank
     * offset from [0..(newP.length()-1)] is the offset of current rank inside
     its group

     We then define the communication ring of the current processor as:
     i * (P/div) + offset, where i = 0..(div-1) and offset = rank() - i *
     (P/div)
     */
    static int rank_inside_ring(Interval &P, int div, int global_rank);
    static int rank_outside_ring(Interval &P, int div, int off, int gp);

    // returns the current strategy
    const Strategy get_strategy();

  protected:
    // hierarchy of communicators used throughout the algorithm
    std::vector<MPI_Comm> comm_ring_;
    std::vector<MPI_Comm> comm_subproblem_;
    // equivalents of mpi communicators, but for nccl
#ifdef COSMA_WITH_NCCL
    std::vector<ncclComm_t> nccl_comm_ring_;
    std::vector<ncclComm_t> nccl_comm_subproblem_;
#endif
    int rank_;
    const Strategy strategy_;
    std::vector<int> step_to_comm_index_;
    MPI_Comm full_comm_ = MPI_COMM_NULL;
    int comm_size_ = 0;
    // if true then not all processors were used
    // this usually happens if given number of processors
    // cannot be decomposed nicely (e.g. if P is prime)
    bool using_reduced_comm_;
    bool is_idle_;

    void get_topology_edges(std::vector<int> &dest, std::vector<int> &weight);

    void create_communicators(MPI_Comm comm);
    // same as create just uses MPI_Comm_split instead of MPI_Comm_create
    void split_communicators(MPI_Comm comm);

    MPI_Comm create_comm_ring(MPI_Comm comm, Interval &P, int offset, int div);

    MPI_Comm create_comm_subproblem(MPI_Comm comm, Interval &P, Interval &newP);

    void free_comms();
};

} // namespace cosma


================================================
FILE: src/cosma/context.cpp
================================================
#include <complex>
#include <stdlib.h>

#include "context.hpp"
#include <cosma/communicator.hpp>
#include <cosma/environment_variables.hpp>
#include <cosma/profiler.hpp>

namespace cosma {
#ifdef COSMA_HAVE_GPU
template <typename Scalar>
gpu::mm_handle<Scalar> *cosma_context<Scalar>::get_gpu_context() {
    return gpu_ctx_.get();
}
#endif
template <typename Scalar>
cosma_context<Scalar>::cosma_context() {
    cpu_memory_limit = get_cpu_max_memory<Scalar>();
    memory_pool_.amortization = get_memory_pool_amortization();
    adapt_to_scalapack_strategy = get_adapt_strategy();
    overlap_comm_and_comp = get_overlap_comm_and_comp();
    pin_host_buffers = get_memory_pinning();
#ifdef COSMA_HAVE_GPU
    gpu_ctx_ = gpu::make_context<Scalar>(
        gpu_streams(), gpu_max_tile_m(), gpu_max_tile_n(), gpu_max_tile_k());
#endif
}

template <typename Scalar>
cosma_context<Scalar>::cosma_context(size_t cpu_mem_limit,
                                     int streams,
                                     int tile_m,
                                     int tile_n,
                                     int tile_k) {
    cpu_memory_limit = (long long)cpu_mem_limit;
    adapt_to_scalapack_strategy = get_adapt_strategy();
    overlap_comm_and_comp = get_overlap_comm_and_comp();
    pin_host_buffers = get_memory_pinning();
    memory_pool_.amortization = get_memory_pool_amortization();
    // do not reserve nor resize the memory pool
    // let this just serve as the upper bound when creating a strategy
    // because otherwise, it might reserve/resize to much more than the problem
    // requires memory_pool_.resize(cpu_mem_limit);
#ifdef COSMA_HAVE_GPU
    gpu_ctx_ = gpu::make_context<Scalar>(streams, tile_m, tile_n, tile_k);
    use_unified_memory_ = cosma::get_unified_memory();
#else
    std::cout << "Ignoring parameters in make_context. These parameters only "
                 "used in the CPU version."
              << std::endl;
#endif
}

template <typename Scalar>
cosma_context<Scalar>::~cosma_context() {
    memory_pool_.unpin_all();
#ifdef DEBUG
    if (output) {
        std::cout << "context destroyed" << std::endl;
    }
#endif
}

template <typename Scalar>
memory_pool<Scalar> &cosma_context<Scalar>::get_memory_pool() {
    return memory_pool_;
}

template <typename Scalar>
long long cosma_context<Scalar>::get_cpu_memory_limit() {
    return cpu_memory_limit;
}

template <typename Scalar>
cosma::communicator *cosma_context<Scalar>::get_cosma_comm() {
    return prev_cosma_comm.get();
}

template <typename Scalar>
void cosma_context<Scalar>::register_state(MPI_Comm comm,
                                           const Strategy strategy) {
    if (comm == MPI_COMM_NULL)
        return;

    int same_comm = 0;

    if (!prev_cosma_comm || prev_cosma_comm->full_comm() == MPI_COMM_NULL) {
        prev_strategy = strategy;

        PE(preprocessing_communicators);
        prev_cosma_comm = std::make_unique<cosma::communicator>(strategy, comm);
        PL();
    } else {
        MPI_Comm prev_comm = prev_cosma_comm->full_comm();
        int comm_compare;
        MPI_Comm_compare(prev_comm, comm, &comm_compare);
        same_comm = comm_compare == MPI_CONGRUENT || comm_compare == MPI_IDENT;

        bool same_strategy = strategy == prev_strategy;

        // if same_comm and same strategy -> reuse the communicators
        if (!same_comm || !same_strategy) {
            prev_strategy = strategy;

            PE(preprocessing_communicators);
            prev_cosma_comm =
                std::make_unique<cosma::communicator>(strategy, comm);
            PL();

            memory_pool_.unpin_all();
            memory_pool_.already_pinned = false;
            memory_pool_.resized = false;
        }
    }

    // if this rank is not taking part in multiply, return
    // if (prev_cosma_comm->is_idle()) return;

#ifdef COSMA_HAVE_GPU
    if (!prev_cosma_comm->is_idle() && !memory_pool_.resized && same_comm &&
        strategy == prev_strategy) {
        memory_pool_.already_pinned = true;
    }
#endif
}

template <typename Scalar>
void cosma_context<Scalar>::turn_on_output() {
    output = true;
    memory_pool_.turn_on_output();
}

template <typename Scalar>
context<Scalar> make_context() {
    return std::make_unique<cosma_context<Scalar>>();
}

template <typename Scalar>
context<Scalar> make_context(size_t cpu_mem_limit,
                             int streams,
                             int tile_m,
                             int tile_n,
                             int tile_k) {
    return std::make_unique<cosma_context<Scalar>>(
        cpu_mem_limit, streams, tile_m, tile_n, tile_k);
}

// Meyer's singleton, thread-safe in C++11, but not in C++03.
// The thread-safety is guaranteed by the standard in C++11:
//     If control enters the declaration concurrently
//     while the variable is being initialized,
//     the concurrent execution shall wait
//     for completion of the initialization
template <typename Scalar>
global_context<Scalar> get_context_instance() {
    static context<Scalar> ctxt = make_context<Scalar>();
    return ctxt.get();
}

using zfloat = std::complex<float>;
using zdouble = std::complex<double>;

// template instantiation for cosma_context
template class cosma_context<float>;
template class cosma_context<double>;
template class cosma_context<zfloat>;
template class cosma_context<zdouble>;

// template instantiation for make_context
template context<float> make_context();
template context<double> make_context();
template context<zfloat> make_context();
template context<zdouble> make_context();

template context<float> make_context(size_t cpu_mem_limit,
                                     int streams,
                                     int tile_m,
                                     int tile_n,
                                     int tile_k);
template context<double> make_context(size_t cpu_mem_limit,
                                      int streams,
                                      int tile_m,
                                      int tile_n,
                                      int tile_k);
template context<zfloat> make_context(size_t cpu_mem_limit,
                                      int streams,
                                      int tile_m,
                                      int tile_n,
                                      int tile_k);
template context<zdouble> make_context(size_t cpu_mem_limit,
                                       int streams,
                                       int tile_m,
                                       int tile_n,
                                       int tile_k);

// template instantiation for get_context_instance
template global_context<float> get_context_instance();
template global_context<double> get_context_instance();
template global_context<zfloat> get_context_instance();
template global_context<zdouble> get_context_instance();
} // namespace cosma


================================================
FILE: src/cosma/context.hpp
================================================
#pragma once
#include <cosma/memory_pool.hpp>
#include <cosma/strategy.hpp>
#include <iostream>
#include <memory>

#include <mpi.h>

#ifdef COSMA_HAVE_GPU
#include <Tiled-MM/device_stream.hpp>
#include <Tiled-MM/tiled_mm.hpp>
#endif

namespace cosma {

// forward-declaration
class communicator;

template <typename Scalar>
class cosma_context {
  public:
    cosma_context();
    cosma_context(size_t cpu_mem_limit,
                  int streams,
                  int tile_m,
                  int tile_n,
                  int tile_k);
    ~cosma_context();

    void register_state(MPI_Comm comm, const Strategy strategy);

    memory_pool<Scalar> &get_memory_pool();
#ifdef COSMA_HAVE_GPU
    gpu::mm_handle<Scalar> *get_gpu_context();
#endif

    cosma::communicator *get_cosma_comm();

    long long get_cpu_memory_limit();

    void turn_on_output();

    bool unified_memory();

    bool adapt_to_scalapack_strategy = true;

    bool overlap_comm_and_comp = false;

    bool pin_host_buffers = true;

#if defined(COSMA_WITH_GPU_AWARE_MPI) || defined(COSMA_WITH_NCCL)
    gpu::device_stream gpu_stream;
#endif

  private:
    long long cpu_memory_limit = std::numeric_limits<long long>::max();
    memory_pool<Scalar> memory_pool_;
#ifdef COSMA_HAVE_GPU
    std::unique_ptr<gpu::mm_handle<Scalar>> gpu_ctx_;
    // gpu::mm_handle<Scalar> gpu_ctx_;
#endif
    bool output = false;
    bool use_unified_memory_ = false;
    Strategy prev_strategy;
    std::unique_ptr<cosma::communicator> prev_cosma_comm;
};

template <typename Scalar>
using global_context = cosma_context<Scalar> *;

template <typename Scalar>
using context = std::unique_ptr<cosma_context<Scalar>>;

template <typename Scalar>
context<Scalar> make_context();

template <typename Scalar>
context<Scalar> make_context(size_t cpu_mem_limit,
                             int streams,
                             int tile_m,
                             int tile_n,
                             int tile_k);

// Meyer's singleton, thread-safe in C++11, but not in C++03.
// The thread-safety is guaranteed by the standard in C++11:
//     If control enters the declaration concurrently
//     while the variable is being initialized,
//     the concurrent execution shall wait
//     for completion of the initialization
template <typename Scalar>
global_context<Scalar> get_context_instance();
} // namespace cosma


================================================
FILE: src/cosma/cosma_pxgemm.cpp
================================================
#include <cassert>
#include <mpi.h>

#include <cosma/blacs.hpp>
#include <cosma/multiply.hpp>
#include <cosma/cosma_pxgemm.hpp>
#include <cosma/profiler.hpp>
#include <cosma/pxgemm_params.hpp>
#include <cosma/environment_variables.hpp>

#include <costa/grid2grid/ranks_reordering.hpp>
#include <costa/grid2grid/transformer.hpp>

namespace cosma {
template <typename T>
void pxgemm(const char transa,
           const char transb,
           const int m,
           const int n,
           const int k,
           const T alpha,
           const T *a,
           const int ia,
           const int ja,
           const int *desca,
           const T *b,
           const int ib,
           const int jb,
           const int *descb,
           const T beta,
           T *c,
           const int ic,
           const int jc,
           const int *descc) {
    // **********************************
    //           CORNER CASES
    // **********************************
    // edge cases, which are allowed by the standard
    if (m == 0 || n == 0) return;
    // afterwards we are sure m != 0 and n != 0
    if (k == 0 || alpha == T{0}) {
        // scale matrix C by beta
        // starting from (ic-1, jc-1)
        scale_matrix(descc, c, ic, jc, m, n, beta);
        return;
    }
    // afterwards we are sure k != 0 and alpha != 0
    // case beta == 0 is already handled by the code below

    // **********************************
    //           MAIN CODE
    // **********************************
    // clear the profiler
    PC();
    // start profiling
    PE(init);
    char trans_a = std::toupper(transa);
    char trans_b = std::toupper(transb);

    // blas context
    int ctxt = scalapack::get_grid_context(desca, descb, descc);

    // scalapack rank grid decomposition
    int procrows, proccols;
    int myrow, mycol;
    blacs::Cblacs_gridinfo(ctxt, &procrows, &proccols, &myrow, &mycol);

    // get MPI communicator
    MPI_Comm comm = scalapack::get_communicator(ctxt);

    // communicator size and rank
    int rank, P;
    MPI_Comm_size(comm, &P);
    MPI_Comm_rank(comm, &rank);

    // block sizes
    scalapack::block_size b_dim_a(desca);
    scalapack::block_size b_dim_b(descb);
    scalapack::block_size b_dim_c(descc);

    // global matrix sizes
    scalapack::global_matrix_size mat_dim_a(desca);
    scalapack::global_matrix_size mat_dim_b(descb);
    scalapack::global_matrix_size mat_dim_c(descc);

    // sumatrix size to multiply
    int a_subm = trans_a == 'N' ? m : k;
    int a_subn = trans_a == 'N' ? k : m;

    int b_subm = trans_b == 'N' ? k : n;
    int b_subn = trans_b == 'N' ? n : k;

    int c_subm = m;
    int c_subn = n;

    // rank sources (rank coordinates that own first row and column of a matrix)
    scalapack::rank_src rank_src_a(desca);
    scalapack::rank_src rank_src_b(descb);
    scalapack::rank_src rank_src_c(descc);

    // leading dimensions
    int lld_a = scalapack::leading_dimension(desca);
    int lld_b = scalapack::leading_dimension(descb);
    int lld_c = scalapack::leading_dimension(descc);

    // check whether rank grid is row-major or col-major
    auto ordering = scalapack::rank_ordering(ctxt, P);
    char grid_order = 
        ordering == costa::scalapack::ordering::column_major ? 'C' : 'R';

#ifdef DEBUG
    if (rank == 0) {
        pxgemm_params<T> params(
                             // global dimensions
                             mat_dim_a.rows, mat_dim_a.cols,
                             mat_dim_b.rows, mat_dim_b.cols,
                             mat_dim_c.rows, mat_dim_c.cols,
                             // block dimensions
                             b_dim_a.rows, b_dim_a.cols,
                             b_dim_b.rows, b_dim_b.cols,
                             b_dim_c.rows, b_dim_c.cols,
                             // submatrix start
                             ia, ja,
                             ib, jb,
                             ic, jc,
                             // problem size
                             m, n, k,
                             // transpose flags
                             trans_a, trans_b,
                             // alpha, beta
                             alpha, beta,
                             // leading dimensinons
                             lld_a, lld_b, lld_c,
                             // processor grid
                             procrows, proccols,
                             // processor grid ordering
                             grid_order,
                             // ranks containing first rows
                             rank_src_a.row_src, rank_src_a.col_src,
                             rank_src_b.row_src, rank_src_b.col_src,
                             rank_src_c.row_src, rank_src_c.col_src
                         );
        std::cout << params << std::endl;
    }
    MPI_Barrier(comm);
#endif

    std::vector<int> divisors;
    std::string step_type = "";
    std::string dimensions = "";
    PL();

    PE(strategy);
    /*
      If the matrix is very large, then its reshuffling is expensive.
      For this reason, try to adapt the strategy to the scalapack layout
      to minimize the need for reshuffling, even if it makes a 
      suoptimal communication scheme in COSMA.
      This method will add "prefix" to the strategy, i.e. some initial steps
      that COSMA should start with and then continue with finding 
      the communication-optimal strategy.
     */
    bool strategy_adapted = false;
    if (P > 1 && get_context_instance<T>()->adapt_to_scalapack_strategy) {
        adapt_strategy_to_block_cyclic_grid(divisors, dimensions, step_type,
                                            m, n, k, P,
                                            mat_dim_a, mat_dim_b, mat_dim_c,
                                            b_dim_a, b_dim_b, b_dim_c,
                                            ia, ja, ib, jb, ic, jc,
                                            trans_a, trans_b,
                                            procrows, proccols,
                                            grid_order
                                            );
        if (step_type != "") {
            strategy_adapted = true;
        }
    }

    // get CPU memory limit
    auto cpu_memory_limit = get_context_instance<T>()->get_cpu_memory_limit();
    Strategy strategy(m, n, k, P,
                      divisors, dimensions, step_type,
                      cpu_memory_limit);
    // enable overlapping communication and computation if turned on
    if (get_context_instance<T>()->overlap_comm_and_comp) {
        strategy.enable_overlapping_comm_and_comp();
    }
    PL();

    PE(init);

#ifdef DEBUG
    if (rank == 0) {
        std::cout << strategy << std::endl;
        std::cout << "============================================" << std::endl;
    }
    MPI_Barrier(comm);
#endif

    PL();
    // create COSMA mappers
    Mapper mapper_a('A', strategy, rank);
    Mapper mapper_b('B', strategy, rank);
    Mapper mapper_c('C', strategy, rank);

    auto cosma_grid_a = mapper_a.get_layout_grid();
    auto cosma_grid_b = mapper_b.get_layout_grid();
    auto cosma_grid_c = mapper_c.get_layout_grid();

    PE(transform_init);
    // get abstract layout descriptions for ScaLAPACK layout
    auto scalapack_layout_a = costa::get_scalapack_layout<T>(
        lld_a,
        {mat_dim_a.rows, mat_dim_a.cols},
        {ia, ja},
        {a_subm, a_subn},
        {b_dim_a.rows, b_dim_a.cols},
        {procrows, proccols},
        ordering,
        {rank_src_a.row_src, rank_src_a.col_src},
        a,
        'C',
        rank);

    auto scalapack_layout_b = costa::get_scalapack_layout<T>(
        lld_b,
        {mat_dim_b.rows, mat_dim_b.cols},
        {ib, jb},
        {b_subm, b_subn},
        {b_dim_b.rows, b_dim_b.cols},
        {procrows, proccols},
        ordering,
        {rank_src_b.row_src, rank_src_b.col_src},
        b,
        'C',
        rank);

    auto scalapack_layout_c = costa::get_scalapack_layout<T>(
        lld_c,
        {mat_dim_c.rows, mat_dim_c.cols},
        {ic, jc},
        {c_subm, c_subn},
        {b_dim_c.rows, b_dim_c.cols},
        {procrows, proccols},
        ordering,
        {rank_src_c.row_src, rank_src_c.col_src},
        c,
        'C',
        rank);
    PL();

    // by default, no process-relabeling is assumed.
    bool reordered = false;
    std::vector<int> rank_permutation;
    MPI_Comm reordered_comm = comm;

    if (!strategy_adapted) {
        PE(transform_reordering_matching);
        // total communication volume for transformation of layouts
        // costa::comm_volume comm_vol;
        auto comm_vol = costa::communication_volume(scalapack_layout_a.grid, cosma_grid_a, trans_a);
        comm_vol += costa::communication_volume(scalapack_layout_b.grid, cosma_grid_b, trans_b);
        comm_vol += costa::communication_volume(cosma_grid_c, scalapack_layout_c.grid, 'N');

        // compute the optimal rank reordering that minimizes the communication volume
        rank_permutation = costa::optimal_reordering(comm_vol, P, reordered);
        PL();

        // create reordered communicator, which has same ranks
        // but relabelled as given by the rank_permutation
        // (to avoid the communication during layout transformation)
        PE(transform_reordering_comm);
        if (reordered) {
            MPI_Comm_split(comm, 0, rank_permutation[rank], &reordered_comm);
        }
        PL();
    } else {
        rank_permutation.reserve(P);
        // if the strategy is adapted, then no process-relabeling occurs.
        for (int i = 0; i < P; ++i) {
            rank_permutation.push_back(i);
        }
    }

#ifdef DEBUG
    if (rank == 0) {
        std::cout << "Optimal rank relabeling:" << std::endl;
        for (int i = 0; i < P; ++i) {
            std::cout << i << "->" << rank_permutation[i] << std::endl;
        }
    }
#endif

    // first, we don't want to alloate the space, just to precompute
    // the required memory size, so we activate dry_run, which precomputes
    // everything but doesn't allocate anything yet
    bool dont_allocate = true;
    CosmaMatrix<T> A(std::move(mapper_a), rank_permutation[rank], dont_allocate);
    CosmaMatrix<T> B(std::move(mapper_b), rank_permutation[rank], dont_allocate);
    CosmaMatrix<T> C(std::move(mapper_c), rank_permutation[rank], dont_allocate);

    // avoid resizing the buffer by reserving immediately the total required memory
    // collect sizes of all buffers that are going to be allocated for each matrix
    auto A_buffers = A.required_memory();
    auto B_buffers = B.required_memory();
    auto C_buffers = C.required_memory();

    std::vector<std::size_t> buffer_sizes;
    int n_buffers = A_buffers.size() + B_buffers.size() + C_buffers.size();
    if (n_buffers > 0) {
        buffer_sizes.reserve(n_buffers);
        std::copy(A_buffers.begin(), A_buffers.end(), std::back_inserter(buffer_sizes));
        std::copy(B_buffers.begin(), B_buffers.end(), std::back_inserter(buffer_sizes));
        std::copy(C_buffers.begin(), C_buffers.end(), std::back_inserter(buffer_sizes));

        // allocate all buffers in the memory pool
        get_context_instance<T>()->get_memory_pool().reserve(buffer_sizes);
    }

    // turn off dryrun mode, allocate memory for all matrices
    A.allocate();
    B.allocate();
    C.allocate();

    // get abstract layout descriptions for COSMA layout
    auto cosma_layout_a = A.get_grid_layout();
    auto cosma_layout_b = B.get_grid_layout();
    auto cosma_layout_c = C.get_grid_layout();

    cosma_layout_a.reorder_ranks(rank_permutation);
    cosma_layout_b.reorder_ranks(rank_permutation);
    cosma_layout_c.reorder_ranks(rank_permutation);

#ifdef DEBUG
    std::cout << "Transforming the input matrices A and B from Scalapack -> COSMA" << std::endl;
#endif
    // transform A and B from scalapack to cosma layout
    costa::transformer<T> transf(comm);
    transf.schedule(scalapack_layout_a, cosma_layout_a, trans_a, T{1}, T{0});
    // transf.transform();
    transf.schedule(scalapack_layout_b, cosma_layout_b, trans_b, T{1}, T{0});

    transf.transform();

#ifdef DEBUG
    std::cout << "COSMA multiply" << std::endl;
#endif

    // perform cosma multiplication
    multiply<T>(A, B, C, strategy, reordered_comm, T{1}, T{0});
    // construct cosma layout again, to avoid outdated
    // pointers when the memory pool has been used
    // in case it resized during multiply
    cosma_layout_c = C.get_grid_layout();
    cosma_layout_c.reorder_ranks(rank_permutation);

#ifdef DEBUG
    std::cout << "Transforming the result C back from COSMA to ScaLAPACK" << std::endl;
#endif
    // costa::transform the result from cosma back to scalapack
    // costa::transform<T>(cosma_layout_c, scalapack_layout_c, comm);
    transf.schedule(cosma_layout_c, scalapack_layout_c, 'N', alpha, beta);

    transf.transform();

#ifdef DEBUG
    if (rank == 0) {
        auto reordered_vol = costa::communication_volume(scalapack_layout_a.grid, cosma_layout_a.grid, trans_a);
        reordered_vol += costa::communication_volume(scalapack_layout_b.grid, cosma_layout_b.grid, trans_b);
        if (std::abs(beta) > 0) {
            reordered_vol += costa::communication_volume(scalapack_layout_c.grid, cosma_layout_c.grid, 'N');
        }
        reordered_vol += costa::communication_volume(cosma_layout_c.grid, scalapack_layout_c.grid, 'N');

        // std::cout << "Detailed comm volume: " << comm_vol << std::endl;
        // std::cout << "Detailed comm volume reordered: " << reordered_vol << std::endl;

        auto reordered_vol_total = reordered_vol.total_volume();
        std::cout << "Comm volume [num_of_elements]= " << reordered_vol_total << std::endl;
    }
#endif

    PE(transform_reordering_comm);
    if (reordered) {
        MPI_Comm_free(&reordered_comm);
    }
    PL();
}

// scales the submatrix of C by beta
// The submatrix is defined by (ic-1, jc-1) and (ic-1+m, jc-1+n)
template <typename T>
void scale_matrix(const int* descc, T* c,
                  const int ic, const int jc,
                  const int m, const int n,
                  const T beta) {
    if (beta == T{1}) return;
    // clear the profiler
    PC();

    // start profiling
    PE(init);

    // blas context
    int ctxt = scalapack::get_grid_context(descc);

    // scalapack rank grid decomposition
    int procrows, proccols;
    int myrow, mycol;
    blacs::Cblacs_gridinfo(ctxt, &procrows, &proccols, &myrow, &mycol);

    // get MPI communicator
    MPI_Comm comm = scalapack::get_communicator(ctxt);

    // communicator size and rank
    int rank, P;
    MPI_Comm_size(comm, &P);
    MPI_Comm_rank(comm, &rank);

    // block sizes
    scalapack::block_size b_dim_c(descc);

    // global matrix sizes
    scalapack::global_matrix_size mat_dim_c(descc);

    // sumatrix size to multiply
    int c_subm = m;
    int c_subn = n;

    // rank sources (rank coordinates that own first row and column of a matrix)
    scalapack::rank_src rank_src_c(descc);

    // leading dimensions
    int lld_c = scalapack::leading_dimension(descc);

    // check whether rank grid is row-major or col-major
    auto ordering = scalapack::rank_ordering(ctxt, P);
    char grid_order = 
        ordering == costa::scalapack::ordering::column_major ? 'C' : 'R';

    // create costa object describing the given scalapack layout
    auto layout = costa::get_scalapack_layout<T>(
        lld_c,
        {mat_dim_c.rows, mat_dim_c.cols},
        {ic, jc},
        {c_subm, c_subn},
        {b_dim_c.rows, b_dim_c.cols},
        {procrows, proccols},
        ordering,
        {rank_src_c.row_src, rank_src_c.col_src},
        c,
        'C',
        rank);
    PL();

    PE(multiply_computation);
    // scale the elements in the submatrix given by the layout
    layout.scale_by(beta);
    PL();
}

// returns A, B or C, depending on which flag was set to true.
// used to minimize the number of if-else statements in adapt_strategy
template <typename T>
T& one_of(T &A,
         T &B,
         T &C,
         bool first,
         bool second,
         bool third) {
    if (first) return A;
    if (second) return B;
    return C;
}

// returns the largest of (first, second, third) and sets the corresponding
// boolean flag of the largest element to true.
// used to minimize the number of if-else statements in adapt_strategy
template <typename T>
T which_is_largest(T&& first, T&& second, T&& third,
                      bool& first_largest, bool& second_largest, bool& third_largest) {
    T largest = std::max(std::max(first, second), third);
    first_largest = false;
    second_largest = false;
    third_largest = false;
    if (largest == first) {
        first_largest = true;
        return std::forward<T>(first);
    }
    if (largest == second) {
        second_largest = true;
        return std::forward<T>(second);
    }
    if (largest == third) {
        third_largest = true;
        return std::forward<T>(third);
    }
    return T{};
}

char get_matrix_dimension(bool matrix_A, bool matrix_B, bool matrix_C,
                                 char trans_a, char trans_b,
                                 int index) {
    std::string dimensions = "";
    if (matrix_A) {
        // if transposed
        dimensions = trans_a != 'N' ? "km" : "mk";
    } else if (matrix_B) {
        dimensions = trans_b != 'N' ? "kn" : "nk";
    } else {
        dimensions = "mn";
    }

    return dimensions[index];
}

void adapt_strategy_to_block_cyclic_grid(// these will contain the suggested strategy prefix
                                         std::vector<int>& divisors, 
                                         std::string& dimensions,
                                         std::string& step_type,
                                         // multiplication problem size
                                         int m, int n, int k, int P,
                                         // global matrix dimensions
                                         scalapack::global_matrix_size& mat_dim_a,
                                         scalapack::global_matrix_size& mat_dim_b,
                                         scalapack::global_matrix_size& mat_dim_c,
                                         // block sizes
                                         scalapack::block_size& b_dim_a,
                                         scalapack::block_size& b_dim_b,
                                         scalapack::block_size& b_dim_c,
                                         // (i, j) denoting the submatrix coordinates
                                         int ia, int ja,
                                         int ib, int jb,
                                         int ic, int jc,
                                         // transpose flags
                                         char trans_a, char trans_b,
                                         // processor grid
                                         int procrows, int proccols,
                                         char order
                                         ) {
    // If the matrix is very large, then its reshuffling is expensive.
    // For this reason, try to adapt the strategy to the scalapack layout
    // to minimize the need for reshuffling, even if it makes a 
    // suoptimal communication scheme in COSMA.
    // Here, we only do this optimization if the scalapack grid
    // fills up the matrix completely (everything is perfectly divisible).
    // Since there are 3 matrices, we only focus on the largest one.
    bool first = false;
    bool second = false;
    bool third = false;

    // sumatrix size to multiply
    int a_subm = trans_a == 'N' ? m : k;
    int a_subn = trans_a == 'N' ? k : m;

    int b_subm = trans_b == 'N' ? k : n;
    int b_subn = trans_b == 'N' ? n : k;

    int c_subm = m;
    int c_subn = n;

    long long largest_matrix_local_size = 
        which_is_largest(1LL * m * k, 
                         1LL * k * n, 
                         1LL * m * n, 
                         first,
                         second,
                         third) / P;

    // We only apply this optimization if the matrix is large enough,
    // because adapting the strategy to the given initial grid
    // might result in a communication-suboptimal strategy.
    // However, when the reshuffling cost is too high, then it might be beneficial
    // to make COSMA use a communication-suboptimal strategy
    // to reduce the overall time.
    if (largest_matrix_local_size > 1e7) {
        auto b_dim = one_of(b_dim_a, b_dim_b, b_dim_c, first, second, third);
        auto mat_dim = one_of(mat_dim_a, mat_dim_b, mat_dim_c, first, second, third);
        auto subm = one_of(a_subm, b_subm, c_subm, first, second, third);
        auto subn = one_of(a_subn, b_subn, c_subn, first, second, third);
        auto i = one_of(ia, ib, ic, first, second, third);
        auto j = one_of(ja, jb, jc, first, second, third);

        // The whole matrix should take part in the multiplication,
        // the blocks sizes should perfectly divide the matrix
        // and processor grid must perfectly cover the matrix blocks grid.
        if ((i == 1 && j == 1)  // no submatrix
            && (subm == mat_dim.rows && subn == mat_dim.cols) // no submatrix
            && (mat_dim.rows % b_dim.rows == 0) // blocks perfectly divide the matrix
            && (mat_dim.cols % b_dim.cols == 0)  // blocks perfectly divide the matrix
            && (mat_dim.rows / b_dim.rows % procrows == 0)
            && (mat_dim.cols / b_dim.cols % proccols == 0)) // processor grid divides the matrix blocks grid
        {
            int divisor_rows = mat_dim.rows / b_dim.rows / procrows;
            int divisor_cols = mat_dim.cols / b_dim.cols / proccols;

            // adding sequential steps
            if (divisor_rows > 1) {
                step_type += "s";
                divisors.push_back(divisor_rows);
                dimensions += get_matrix_dimension(first, second, third,
                                                   trans_a, trans_b,
                                                   0); // 0 means rows
            }

            if (divisor_cols > 1) {
                step_type += "s";
                divisors.push_back(divisor_cols);
                dimensions += get_matrix_dimension(first, second, third,
                                                   trans_a, trans_b,
                                                   1); // 1 means columns
            }

            // adding parallel steps
            if (order == 'R') {
                // first add rows split and then cols split if applicable
                if (procrows > 1) {
                    step_type += "p";
                    divisors.push_back(procrows);
                    dimensions += get_matrix_dimension(first, second, third,
                                                       trans_a, trans_b,
                                                       0); // 1 means columns
                }
                if (proccols > 1) {
                    step_type += "p";
                    divisors.push_back(proccols);
                    dimensions += get_matrix_dimension(first, second, third,
                                                       trans_a, trans_b,
                                                       1); // 1 means columns
                }
            } else {
                // first add cols split and then rows split if applicable
                if (proccols > 1) {
                    step_type += "p";
                    divisors.push_back(proccols);
                    dimensions += get_matrix_dimension(first, second, third,
                                                       trans_a, trans_b,
                                                       1); // 1 means columns
                }
                if (procrows > 1) {
                    step_type += "p";
                    divisors.push_back(procrows);
                    dimensions += get_matrix_dimension(first, second, third,
                                                       trans_a, trans_b,
                                                       0); // 1 means columns
                }
            }
        }
    }
}

bool is_problem_too_small(int m, int n, int k) {
    static const int cosma_dim_threshold = cosma::get_cosma_dim_threshold();
    return std::min(m, std::min(n, k)) < cosma_dim_threshold;
}

// explicit instantiation for pxgemm
template void pxgemm<double>(const char trans_a,
                            const char trans_b,
                            const int m,
                            const int n,
                            const int k,
                            const double alpha,
                            const double *a,
                            const int ia,
                            const int ja,
                            const int *desca,
                            const double *b,
                            const int ib,
                            const int jb,
                            const int *descb,
                            const double beta,
                            double *c,
                            const int ic,
                            const int jc,
                            const int *descc);

template void pxgemm<float>(const char trans_a,
                           const char trans_b,
                           const int m,
                           const int n,
                           const int k,
                           const float alpha,
                           const float *a,
                           const int ia,
                           const int ja,
                           const int *desca,
                           const float *b,
                           const int ib,
                           const int jb,
                           const int *descb,
                           const float beta,
                           float *c,
                           const int ic,
                           const int jc,
                           const int *descc);

template void pxgemm<zdouble_t>(const char trans_a,
                               const char trans_b,
                               const int m,
                               const int n,
                               const int k,
                               const zdouble_t alpha,
                               const zdouble_t *a,
                               const int ia,
                               const int ja,
                               const int *desca,
                               const zdouble_t *b,
                               const int ib,
                               const int jb,
                               const int *descb,
                               const zdouble_t beta,
                               zdouble_t *c,
                               const int ic,
                               const int jc,
                               const int *descc);

template void pxgemm<zfloat_t>(const char trans_a,
                              const char trans_b,
                              const int m,
                              const int n,
                              const int k,
                              const zfloat_t alpha,
                              const zfloat_t *a,
                              const int ia,
                              const int ja,
                              const int *desca,
                              const zfloat_t *b,
                              const int ib,
                              const int jb,
                              const int *descb,
                              const zfloat_t beta,
                              zfloat_t *c,
                              const int ic,
                              const int jc,
                              const int *descc);
} // namespace cosma


================================================
FILE: src/cosma/cosma_pxgemm.hpp
================================================
#pragma once
#include <complex>
#include <cosma/scalapack.hpp>
/*
 * This is a COSMA backend for matrices given in ScaLAPACK format.
 * It is less efficient than using cosma::multiply directly with COSMA data
 * layout. Thus, here we pay the price of transforming matrices between
 * scalapack and COSMA layout.
 */
namespace cosma {

using zdouble_t = std::complex<double>;
using zfloat_t = std::complex<float>;

template <typename T>
void pxgemm(const char trans_a,
           const char trans_b,
           const int m,
           const int n,
           const int k,
           const T alpha,
           const T *a,
           const int ia,
           const int ja,
           const int *desca,
           const T *b,
           const int ib,
           const int jb,
           const int *descb,
           const T beta,
           T *c,
           const int ic,
           const int jc,
           const int *descc);

/*
  If the matrix is very large, then its reshuffling is expensive.
  For this reason, try to adapt the strategy to the scalapack layout
  to minimize the need for reshuffling, even if it makes a 
  suoptimal communication scheme in COSMA.
*/
void adapt_strategy_to_block_cyclic_grid(// these will contain the suggested strategy prefix
                                         std::vector<int>& divisors, 
                                         std::string& dimensions,
                                         std::string& step_type,
                                         // multiplication problem size
                                         int m, int n, int k, int P,
                                         // global matrix dimensions
                                         scalapack::global_matrix_size& mat_dim_a,
                                         scalapack::global_matrix_size& mat_dim_b,
                                         scalapack::global_matrix_size& mat_dim_c,
                                         // block sizes
                                         scalapack::block_size& b_dim_a,
                                         scalapack::block_size& b_dim_b,
                                         scalapack::block_size& b_dim_c,
                                         // (i, j) denoting the submatrix coordinates
                                         int ia, int ja,
                                         int ib, int jb,
                                         int ic, int jc,
                                         // transpose flags
                                         char transa, char transb,
                                         // processor grid
                                         int procrows, int proccols,
                                         char order
                                         );

// scales the submatrix of C by beta
// The submatrix is defined by (ic-1, jc-1) and (ic-1+m, jc-1+n)
template <typename T>
void scale_matrix(const int* descc, T* c,
                  const int ic, const int jc,
                  const int m, const int n,
                  const T beta);


// checks if the problem is too small for COSMA
bool is_problem_too_small(int m, int n, int k);

} // namespace cosma


================================================
FILE: src/cosma/environment_variables.cpp
================================================
#include <algorithm>
#include <cosma/environment_variables.hpp>

bool cosma::env_var_defined(const char *var_name) {
    char *var = getenv(var_name);
    return var != nullptr;
}

bool cosma::get_bool_env_var(std::string name, bool default_value) {
    char *var;
    var = getenv(name.c_str());
    bool value = default_value;
    if (var != nullptr) {
        std::string s(var);
        std::transform(s.begin(), s.end(), s.begin(), [&](char c) {
            return std::toupper(c);
        });
        value = (s == "ON");
    }
    return value;
}

int cosma::get_int_env_var(std::string name, int default_value) {
    char *var;
    var = getenv(name.c_str());
    int value = default_value;
    if (var != nullptr)
        value = std::atoi(var);
    return value;
}

float cosma::get_float_env_var(std::string name, float default_value) {
    char *var;
    var = getenv(name.c_str());
    float value = default_value;
    if (var != nullptr)
        value = std::atof(var);
    return value;
}

double cosma::get_double_env_var(std::string name, double default_value) {
    char *var;
    var = getenv(name.c_str());
    double value = default_value;
    if (var != nullptr)
        value = std::atof(var);
    return value;
}

std::size_t cosma::get_ull_env_var(std::string name, size_t default_value) {
    char *var;
    var = getenv(name.c_str());
    size_t value = default_value;
    if (var != nullptr)
        value = std::stoull(std::string(var));
    return std::size_t(value);
}

int cosma::gpu_streams() {
    return get_int_env_var(env_var_names::gpu_n_streams,
                           env_var_defaults::gpu_n_streams);
}

int cosma::gpu_max_tile_m() {
    return get_int_env_var(env_var_names::gpu_tile_m,
                           env_var_defaults::gpu_tile_m);
}

int cosma::gpu_max_tile_n() {
    return get_int_env_var(env_var_names::gpu_tile_n,
                           env_var_defaults::gpu_tile_n);
}

int cosma::gpu_max_tile_k() {
    return get_int_env_var(env_var_names::gpu_tile_k,
                           env_var_defaults::gpu_tile_k);
}

bool cosma::get_adapt_strategy() {
    return get_bool_env_var(env_var_names::adapt_strategy,
                            env_var_defaults::adapt_strategy);
}

bool cosma::get_overlap_comm_and_comp() {
    return get_bool_env_var(env_var_names::overlap, env_var_defaults::overlap);
}

bool cosma::get_memory_pinning() {
    return get_bool_env_var(env_var_names::memory_pinning_enabled,
                            env_var_defaults::memory_pinning_enabled);
}

bool cosma::get_unified_memory() {
    return get_bool_env_var(env_var_names::cosma_gpu_unified_memory,
                            env_var_defaults::unified_memory);
}

double cosma::get_memory_pool_amortization() {
    return get_double_env_var(env_var_names::memory_pool_amortization,
                              env_var_defaults::memory_pool_amortization);
}

int cosma::get_min_local_dimension() {
    return get_int_env_var(env_var_names::min_local_dimension,
                           env_var_defaults::min_local_dimension);
}

int cosma::get_cosma_dim_threshold() {
    return get_int_env_var(env_var_names::cosma_dim_threshold,
                           env_var_defaults::cosma_dim_threshold);
}

int cosma::get_cosma_cpu_memory_alignment() {
    return get_int_env_var(env_var_names::cosma_cpu_memory_alignment,
                           env_var_defaults::cosma_cpu_memory_alignment);
}

// reads the memory limit in MB per rank
// and converts the limit to #elements that each rank is allowed to use
template <typename T>
long long cosma::get_cpu_max_memory() {
    char *var;
    var = getenv(env_var_names::cpu_max_memory.c_str());
    long long value = env_var_defaults::cpu_max_memory;
    long long megabytes = env_var_defaults::cpu_max_memory;
    if (var != nullptr) {
        megabytes = std::atoll(var);
        // from megabytes to #elements
        value = megabytes * 1024LL * 1024LL / sizeof(T);
    }

    return value;
}

// template instantiation of get_cpu_max_memory()
template long long cosma::get_cpu_max_memory<float>();
template long long cosma::get_cpu_max_memory<double>();
template long long cosma::get_cpu_max_memory<std::complex<float>>();
template long long cosma::get_cpu_max_memory<std::complex<double>>();


================================================
FILE: src/cosma/environment_variables.hpp
================================================
#pragma once
#include <complex>
#include <limits>
#include <stdlib.h>
#include <string>

namespace cosma {

// names of supported environment variables
namespace env_var_names {
// number of GPU streams to be used per rank
const std::string gpu_n_streams = "COSMA_GPU_STREAMS";
// max sizes of GPU tiles (in #elements)
// MxN corresponds to matrix C and K to the shared dimension
const std::string gpu_tile_m = "COSMA_GPU_MAX_TILE_M";
const std::string gpu_tile_n = "COSMA_GPU_MAX_TILE_N";
const std::string gpu_tile_k = "COSMA_GPU_MAX_TILE_K";
// if ON, COSMA will try to natively use scalapack layout
// without transformation. Only used in the pxgemm wrapper.
const std::string adapt_strategy = "COSMA_ADAPT_STRATEGY";
// if ON, COSMA will try to overlap communication and computation
const std::string overlap = "COSMA_OVERLAP_COMM_AND_COMP";
// specifies the maximum available CPU memory per rank in MB
const std::string cpu_max_memory = "COSMA_CPU_MAX_MEMORY";
// if true, local host matrices will be pinned
// (only used when GPU backend enabled)
// which increases the efficiency
const std::string memory_pinning_enabled = "COSMA_GPU_MEMORY_PINNING";
// The scaling factor used for the memory-pool allocation size.(cpu-only).
// If amortization = 1.2, then the memory allocator
// will request 1.2x the requested size (thus, 20% more than needed).
// Higher values better amortize the cost of memory buffers resizing
// which can occur when the algorithm is invoked for different matrix sizes.
// However, higher amortization values also mean that
// potentially more memory is allocated than used which can be
// a problem when the memory resource is tight.
// There is just a single memory pool in COSMA and all the required
// memory is taken from this memory pool only.
const std::string memory_pool_amortization = "COSMA_MEMORY_POOL_AMORTIZATION";
// minimum local matrix size -- if P is too large, so that after
// splitting the local matrix size get lower than this,
// then P will be reduced so that the problem size
// never gets smaller than specified by this variable
const std::string min_local_dimension = "COSMA_MIN_LOCAL_DIMENSION";
// if any dimension is smaller than this threshold, it will be dispatched to
// SCALAPACK since it's too "thin" for COSMA in that case
const std::string cosma_dim_threshold = "COSMA_DIM_THRESHOLD";
// number of bytes to which all host buffers are aligned
const std::string cosma_cpu_memory_alignment = "COSMA_CPU_MEMORY_ALIGNMENT";
// IF ON, use unified memory
const std::string cosma_gpu_unified_memory = "COSMA_GPU_UNIFIED_MEMORY";
}; // namespace env_var_names

// default values of supported environment variables
namespace env_var_defaults {
// number of GPU streams to be used per rank
const int gpu_n_streams = 2;
// max sizes of GPU tiles (in #elements)
// MxN corresponds to matrix C and K to the shared dimension
const int gpu_tile_m = 5000;
const int gpu_tile_n = 5000;
const int gpu_tile_k = 5000;
// if ON, COSMA will try to natively use scalapack layout
// without transformation. Only used in the pxgemm wrapper.
const bool adapt_strategy = true;
// if ON, COSMA will try to overlap communication and computation
const bool overlap = false;
// specifies the maximum available CPU memory per rank in MB
const long long cpu_max_memory = std::numeric_limits<long long>::max(); // inf
// if true, local host matrices will be pinned
// (only used when GPU backend enabled)
// which increases the efficiency
const bool memory_pinning_enabled = true;
// The scaling factor used for the memory-pool allocation size.(cpu-only).
// If amortization = 1.2, then the memory allocator
// will request 1.2x the requested size (thus, 20% more than needed).
// Higher values better amortize the cost of memory buffers resizing
// which can occur when the algorithm is invoked for different matrix sizes.
// However, higher amortization values also mean that
// potentially more memory is allocated than used which can be
// a problem when the memory resource is tight.
// There is just a single memory pool in COSMA and all the required
// memory is taken from this memory pool only.
const double memory_pool_amortization = 1.2;
// minimum local matrix size -- if P is too large, so that after
// splitting the local matrix size get lower than this,
// then P will be reduced so that the problem size
// never gets smaller than specified by this variable
const int min_local_dimension = 200;
// if any dimension is smaller than this threshold, it will be dispatched to
// SCALAPACK since it's too "thin" for COSMA in that case
const int cosma_dim_threshold = 0;
// cpu memory alignment (currently disabled)
const int cosma_cpu_memory_alignment = 0; // 256;
// gpu unified memory mechanism
const bool unified_memory = false;
}; // namespace env_var_defaults

// checks if the specified environment variable is defined
bool env_var_defined(const char *var_name);

// checks if the environment variable with given name
// is set to ON or OFF. If the variable is not defined,
// the default value is returned
bool get_bool_env_var(std::string name, bool default_value);

// gets the value of the specified environment variable.
// If the variable is not defined, the default value is returned
int get_int_env_var(std::string name, int default_value);

// gets the value of the specified environment variable.
// If the variable is not defined, the default value is returned
size_t get_ull_env_var(std::string name, size_t default_value);

// gets the value of the specified environment variable.
// If the variable is not defined, the default value is returned
float get_float_env_var(std::string name, float default_value);

// gets the value of the specified environment variable.
// If the variable is not defined, the default value is returned
double get_double_env_var(std::string name, double default_value);

// reads the environment variable corresponding to
// the number of GPU streams per rank and returns
// the default value if the variable is undefined
int gpu_streams();

// reads the environment variable corresponding to
// the maximum tile sizes on GPU and returns
// the default values if the variables are undefined.
// MxN corresponds to matrix C and K to the shared dimension
int gpu_max_tile_m();
int gpu_max_tile_n();
int gpu_max_tile_k();

// reads the environment variable corresponding to
// the adaptation of strategy and returns
// the default value if the variable is undefined
bool get_adapt_strategy();

// reads the environment variable corresponding to
// the overlap of communication and computation and returns
// the default value if the variable is undefined
bool get_overlap_comm_and_comp();

// reads the memory pool amortization (>= 1.0).
// If amortization = 1.2, then the memory allocator
// will request 1.2x the requested size (thus, 20% more than needed).
// Higher values better amortize the cost of memory buffers resizing
// which can occur when the algorithm is invoked for different matrix sizes.
// However, higher amortization values also mean that
// potentially more memory is allocated than used which can be
// a problem when the memory resource is tight.
double get_memory_pool_amortization();

// reads the environment variable corresponding to
// the memory limit in MB per rank, converts the limit
// to #elements that each rank is allowed to use.
// returns the default value if the variable is undefined
template <typename T>
long long get_cpu_max_memory();

// whether host matrix buffers should be pinned or not
// this is only used in the GPU backend to increase
// the transfer speed between CPU and GPU
bool get_memory_pinning();

// if, after the matrices are split among ranks,
// any dimension becomes less than this threashold,
// then the total number of ranks is going to be reduced
// so that no dimension gets less than this threshold
// after splitting.
int get_min_local_dimension();

// if initial dimension (before splitting) is less
// than this threshold, the problem is considered too small
// and is dispatched to SCALAPACK
// This is only used for pxgemm wrappers.
int get_cosma_dim_threshold();

// number of bytes to which all the buffers should be aligned
int get_cosma_cpu_memory_alignment();

// check if we use unified memory or not
bool get_unified_memory();
} // namespace cosma


================================================
FILE: src/cosma/gpu/gpu_aware_mpi_utils.cpp
================================================
#include <complex>
#include <iostream>

#include <cosma/communicator.hpp>
#include <cosma/gpu/utils.hpp>
#include <cosma/gpu/gpu_aware_mpi_utils.hpp>
#include <cosma/mpi_mapper.hpp>
#include <cosma/profiler.hpp>

template <typename Scalar>
void cosma::gpu::gpu_aware_mpi_copy(
            cosma_context<Scalar> *ctx,
            Interval &P,
            Scalar * in, // original_matrix
            Scalar * out,  // expanded matrix
            Scalar *reshuffle_buffer,
            std::vector<std::vector<int>>& size_before,
            std::vector<int> &total_before,
            int total_after,
            size_t step) {
    PE(multiply_communication_other);
    auto mpi_comm = ctx->get_cosma_comm()->active_comm(step);

    int rank = ctx->get_cosma_comm()->rank();
    int div = ctx->get_cosma_comm()->get_strategy().divisor(step);

    int gp, off;
    std::tie(gp, off) = P.locate_in_subinterval(div, rank);

    int relative_rank = rank - P.first();
    int local_size = total_before[relative_rank];

    int sum = 0;
    std::vector<int> total_size(div);
    std::vector<int> dspls(div);

    std::vector<int> subgroup(div);
    bool same_size = true;

    int max_block_size = 0;
    for (int i = 0; i < div; ++i) {
        int target = P.locate_in_interval(div, i, off);
        int temp_size = total_before[target];
        dspls[i] = sum;
        sum += temp_size;
        total_size[i] = temp_size;
        same_size &= temp_size == local_size;
        max_block_size = std::max(max_block_size, temp_size);
    }

    int n_blocks = size_before[relative_rank].size();

    // this will only resize the buffer if not already allocated
    ctx->get_memory_pool().allocate_device_receive_buffer(max_block_size);
    Scalar* d_send_pointer = ctx->get_memory_pool().device_receive_buffer.data();

    ctx->get_memory_pool().allocate_device_send_buffer(div * max_block_size);
    Scalar* d_receive_pointer = ctx->get_memory_pool().device_send_buffer.data();

    auto stream = ctx->gpu_stream.stream();

    // copy input matrix to device
    gpu::copy_to_device_async(in, d_send_pointer, local_size, stream);

    PL();

    PE(multiply_communication_copy);
    auto mpi_type = mpi_mapper<Scalar>::getType();

    // since it's not possible to pass the stream to MPI
    // to perform the collective on that stream
    // we have to make sure the data is copied to gpu
    // before MPI function is called
    gpu::runtime_api::stream_synchronize(stream);

    MPI_Allgather(d_send_pointer,
            max_block_size,
            mpi_type,
            d_receive_pointer,
            max_block_size,
            mpi_type,
            mpi_comm);

    PL();

    PE(multiply_communication_other);
    int index = 0;
    std::vector<int> block_offset(div);
    // order all first sequential parts of all groups first and so on..
    for (int block = 0; block < n_blocks; block++) {
        for (int rank = 0; rank < div; rank++) {
            int target = P.locate_in_interval(div, rank, off);
            int dsp = dspls[rank] + block_offset[rank];
            int b_size = size_before[target][block];
            gpu::copy_to_host_async(
                d_receive_pointer + rank * max_block_size + block_offset[rank],
                out + index, 
                b_size,
                stream);
            index += b_size;
            block_offset[rank] += b_size;
        }
    }

    // wait for the result on the host
    gpu::runtime_api::stream_synchronize(stream);

    PL();
}

template <typename Scalar>
void cosma::gpu::gpu_aware_mpi_reduce(
            cosma_context<Scalar> *ctx,
            Interval &P,
            Scalar *LC, // expanded_matrix
            Scalar *C,  // original matrix
            Scalar *reshuffle_buffer,
            Scalar *reduce_buffer,
            std::vector<std::vector<int>> &c_current,
            std::vector<int> &c_total_current,
            std::vector<std::vector<int>> &c_expanded,
            std::vector<int> &c_total_expanded,
            Scalar beta,
            size_t step,
            bool copy_c_back) {
    PE(multiply_communication_other);
    auto mpi_comm = ctx->get_cosma_comm()->active_comm(step);

    int rank = ctx->get_cosma_comm()->rank();
    int div = ctx->get_cosma_comm()->get_strategy().divisor(step);

    // int div = strategy_->divisor(step);
    // MPI_Comm subcomm = active_comm(step);

    std::vector<int> subgroup(div);

    int gp, off;
    std::tie(gp, off) = P.locate_in_subinterval(div, rank);
    // int gp, off;
    // std::tie(gp, off) = group_and_offset(P, div);

    // reorder the elements as:
    // first all blocks that should be sent to rank 0 then all blocks for
    // rank 1 and so on...
    int n_blocks = c_expanded[off].size();
    std::vector<int> block_offset(n_blocks);

    int sum = 0;
    for (int i = 0; i < n_blocks; ++i) {
        block_offset[i] = sum;
        sum += c_expanded[off][i];
    }

    std::vector<int> recvcnts(div);
    int max_block_size = 0;
    int min_block_size = recvcnts[0];
    for (int i = 0; i < div; ++i) {
        int target = P.locate_in_interval(div, i, off);
        recvcnts[i] = c_total_current[target];
        // the max block size (used to determine the padding)
        max_block_size = std::max(max_block_size, recvcnts[i]);
        min_block_size = std::min(min_block_size, recvcnts[i]);
    }

    bool same_blocks = max_block_size == min_block_size;

    // here is the result of matrix multiplication on GPU
    Scalar* d_LC = LC;
    if (!copy_c_back) {
        d_LC = ctx->get_gpu_context()->get_full_device_buffer_c().data();
    }

    // this will only resize the buffer if not already allocated
    ctx->get_memory_pool().allocate_device_send_buffer(div * max_block_size);
    Scalar* d_reshuffle_buffer = ctx->get_memory_pool().device_send_buffer.data();

    ctx->get_memory_pool().allocate_device_receive_buffer(max_block_size);
    Scalar* d_receive_pointer = ctx->get_memory_pool().device_receive_buffer.data();

    auto stream = ctx->gpu_stream.stream();

    // set all to 0s, so that we don't have to pad each block with 0s up to max_block_size
    /*
    if (!same_blocks) {
        gpu::runtime_api::memset_async(d_reshuffle_buffer, 0, div * max_block_size, stream);
    }
    */

    std::vector<int> blocks_offset_per_group(div, 0);
    // go through the communication ring
    for (int i = 0; i < div; ++i) {
        int target = P.locate_in_interval(div, i, off);

        for (int block = 0; block < n_blocks; ++block) {
            int b_offset = block_offset[block];
            int b_size = c_current[target][block];
            // reshuffle directly into the gpu buffer
            if (!copy_c_back) {
                gpu::copy_device_to_device_async(d_LC + b_offset, 
                                                 d_reshuffle_buffer + i * max_block_size + blocks_offset_per_group[i],
                                                 b_size, stream);
            } else {
                gpu::copy_to_device_async(d_LC + b_offset, 
                                          d_reshuffle_buffer + i * max_block_size + blocks_offset_per_group[i],
                                          b_size, stream);
            }
            block_offset[block] += b_size;
            blocks_offset_per_group[i] += b_size;
        }
    }

    Scalar *receive_pointer = beta != Scalar{0} ? reduce_buffer : C;

    // since it's not possible to pass the stream to MPI
    // to perform the collective on that stream
    // we have to make sure the data is copied to gpu
    // before MPI function is called
    gpu::runtime_api::stream_synchronize(stream);

    PL();

    PE(multiply_communication_reduce);
    auto mpi_type = mpi_mapper<Scalar>::getType();
    MPI_Reduce_scatter_block(d_reshuffle_buffer,
            d_receive_pointer,
            max_block_size,
            mpi_type,
            MPI_SUM,
            mpi_comm);
    PL();

    PE(multiply_communication_other);
    gpu::copy_to_host_async(d_receive_pointer, receive_pointer, recvcnts[gp], stream);

    // wait for the result on the host
    gpu::runtime_api::stream_synchronize(stream);

    if (beta != Scalar{0}) {
        // sum up receiving_buffer with C
        for (int el = 0; el < recvcnts[gp]; ++el) {
            C[el] = beta * C[el] + reduce_buffer[el];
        }
    }
    PL();
}

// template instantiation for gpu_aware_mpi_reduce
template void cosma::gpu::gpu_aware_mpi_reduce<float>(
            cosma_context<float> *ctx,
            Interval &P,
            float *LC, // expanded_matrix
            float *C,  // original matrix
            float *reshuffle_buffer,
            float *reduce_buffer,
            std::vector<std::vector<int>> &c_current,
            std::vector<int> &c_total_current,
            std::vector<std::vector<int>> &c_expanded,
            std::vector<int> &c_total_expanded,
            float beta,
            size_t step,
            bool copy_c_back);

template void cosma::gpu::gpu_aware_mpi_reduce<double>(
            cosma_context<double> *ctx,
            Interval &P,
            double *LC, // expanded_matrix
            double *C,  // original matrix
            double *reshuffle_buffer,
            double *reduce_buffer,
            std::vector<std::vector<int>> &c_current,
            std::vector<int> &c_total_current,
            std::vector<std::vector<int>> &c_expanded,
            std::vector<int> &c_total_expanded,
            double beta,
            size_t step,
            bool copy_c_back);

template void cosma::gpu::gpu_aware_mpi_reduce<std::complex<float>>(
            cosma_context<std::complex<float>> *ctx,
            Interval &P,
            std::complex<float> *LC, // expanded_matrix
            std::complex<float> *C,  // original matrix
            std::complex<float> *reshuffle_buffer,
            std::complex<float> *reduce_buffer,
            std::vector<std::vector<int>> &c_current,
            std::vector<int> &c_total_current,
            std::vector<std::vector<int>> &c_expanded,
            std::vector<int> &c_total_expanded,
            std::complex<float> beta,
            size_t step,
            bool copy_c_back);

template void cosma::gpu::gpu_aware_mpi_reduce<std::complex<double>>(
            cosma_context<std::complex<double>> *ctx,
            Interval &P,
            std::complex<double> *LC, // expanded_matrix
            std::complex<double> *C,  // original matrix
            std::complex<double> *reshuffle_buffer,
            std::complex<double> *reduce_buffer,
            std::vector<std::vector<int>> &c_current,
            std::vector<int> &c_total_current,
            std::vector<std::vector<int>> &c_expanded,
            std::vector<int> &c_total_expanded,
            std::complex<double> beta,
            size_t step,
            bool copy_c_back);

// template instantiation for gpu_aware_mpi_copy
template void cosma::gpu::gpu_aware_mpi_copy<float>(
            cosma_context<float> *ctx,
            Interval &P,
            float * in, // original_matrix
            float * out,  // expanded matrix
            float *reshuffle_buffer,
            std::vector<std::vector<int>>& size_before,
            std::vector<int> &total_before,
            int total_after,
            size_t step);

template void cosma::gpu::gpu_aware_mpi_copy<double>(
            cosma_context<double> *ctx,
            Interval &P,
            double * in, // original_matrix
            double * out,  // expanded matrix
            double *reshuffle_buffer,
            std::vector<std::vector<int>>& size_before,
            std::vector<int> &total_before,
            int total_after,
            size_t step);

template void cosma::gpu::gpu_aware_mpi_copy<std::complex<float>>(
            cosma_context<std::complex<float>> *ctx,
            Interval &P,
            std::complex<float> * in, // original_matrix
            std::complex<float> * out,  // expanded matrix
            std::complex<float> *reshuffle_buffer,
            std::vector<std::vector<int>>& size_before,
            std::vector<int> &total_before,
            int total_after,
            size_t step);

template void cosma::gpu::gpu_aware_mpi_copy<std::complex<double>>(
            cosma_context<std::complex<double>> *ctx,
            Interval &P,
            std::complex<double> * in, // original_matrix
            std::complex<double> * out,  // expanded matrix
            std::complex<double> *reshuffle_buffer,
            std::vector<std::vector<int>>& size_before,
            std::vector<int> &total_before,
            int total_after,
            size_t step);


================================================
FILE: src/cosma/gpu/gpu_aware_mpi_utils.hpp
================================================
#pragma once
#include <vector>

#include <cosma/interval.hpp>
#include <cosma/context.hpp>

#include <mpi.h>

namespace cosma {
namespace gpu {
    template <typename Scalar>
    void gpu_aware_mpi_copy(
                cosma_context<Scalar> *ctx,
                Interval &P,
                Scalar * in, // original_matrix
                Scalar * out,  // expanded matrix
                Scalar *reshuffle_buffer,
                std::vector<std::vector<int>>& size_before,
                std::vector<int> &total_before,
                int total_after,
                size_t step);

    template <typename Scalar>
    void gpu_aware_mpi_reduce(
                cosma_context<Scalar> *ctx,
                Interval &P,
                Scalar *LC, // expanded_matrix
                Scalar *C,  // original matrix
                Scalar *reshuffle_buffer,
                Scalar *reduce_buffer,
                std::vector<std::vector<int>> &c_current,
                std::vector<int> &c_total_current,
                std::vector<std::vector<int>> &c_expanded,
                std::vector<int> &c_total_expanded,
                Scalar beta,
                size_t step,
                bool copy_c_back);

}  // namespace gpu
}  // namespace cosma


================================================
FILE: src/cosma/gpu/gpu_runtime_api.hpp
================================================
/*
 * Copyright (c) 2019 ETH Zurich, Simon Frasch
 *
 * Redistribution and use in source and binary forms, with or without
 * modification, are permitted provided that the following conditions are met:
 *
 * 1. Redistributions of source code must retain the above copyright notice,
 *    this list of conditions and the following disclaimer.
 * 2. Redistributions in binary form must reproduce the above copyright
 *    notice, this list of conditions and the following disclaimer in the
 *    documentation and/or other materials provided with the distribution.
 * 3. Neither the name of the copyright holder nor the names of its contributors
 *    may be used to endorse or promote products derived from this software
 *    without specific prior written permission.
 *
 * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
 * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
 * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
 * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
 * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
 * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
 * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
 * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
 * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
 * POSSIBILITY OF SUCH DAMAGE.
 */
#pragma once

#include <utility>

#if defined(TILED_MM_CUDA)
#include <cuda_runtime_api.h>
#define GPU_PREFIX(val) cuda##val

#elif defined(TILED_MM_ROCM)
#include <hip/hip_runtime_api.h>
#define GPU_PREFIX(val) hip##val

#else
#error Either TILED_MM_CUDA or TILED_MM_ROCM must be defined!
#endif

namespace cosma {
namespace gpu {
namespace runtime_api {

using StatusType = GPU_PREFIX(Error_t);
using StreamType = GPU_PREFIX(Stream_t);
using EventType = GPU_PREFIX(Event_t);

#ifdef TILED_MM_CUDA
using PointerAttributes = GPU_PREFIX(PointerAttributes);
#else
using PointerAttributes = GPU_PREFIX(PointerAttribute_t);
#endif

namespace status {
// error / return values
constexpr StatusType Success = GPU_PREFIX(Success);
constexpr StatusType ErrorMemoryAllocation = GPU_PREFIX(ErrorMemoryAllocation);
constexpr StatusType ErrorLaunchOutOfResources = GPU_PREFIX(ErrorLaunchOutOfResources);
constexpr StatusType ErrorInvalidValue = GPU_PREFIX(ErrorInvalidValue);
constexpr StatusType ErrorInvalidResourceHandle = GPU_PREFIX(ErrorInvalidResourceHandle);
constexpr StatusType ErrorInvalidDevice = GPU_PREFIX(ErrorInvalidDevice);
constexpr StatusType ErrorInvalidMemcpyDirection = GPU_PREFIX(ErrorInvalidMemcpyDirection);
constexpr StatusType ErrorInvalidDevicePointer = GPU_PREFIX(ErrorInvalidDevicePointer);
constexpr StatusType ErrorInitializationError = GPU_PREFIX(ErrorInitializationError);
constexpr StatusType ErrorNoDevice = GPU_PREFIX(ErrorNoDevice);
constexpr StatusType ErrorNotReady = GPU_PREFIX(ErrorNotReady);
constexpr StatusType ErrorUnknown = GPU_PREFIX(ErrorUnknown);
constexpr StatusType ErrorPeerAccessNotEnabled = GPU_PREFIX(ErrorPeerAccessNotEnabled);
constexpr StatusType ErrorPeerAccessAlreadyEnabled = GPU_PREFIX(ErrorPeerAccessAlreadyEnabled);
constexpr StatusType ErrorHostMemoryAlreadyRegistered =
    GPU_PREFIX(ErrorHostMemoryAlreadyRegistered);
constexpr StatusType ErrorHostMemoryNotRegistered = GPU_PREFIX(ErrorHostMemoryNotRegistered);
constexpr StatusType ErrorUnsupportedLimit = GPU_PREFIX(ErrorUnsupportedLimit);
}  // namespace status

// flags to pass to GPU API
namespace flag {
constexpr auto HostRegisterDefault = GPU_PREFIX(HostRegisterDefault);
constexpr auto HostRegisterPortable = GPU_PREFIX(HostRegisterPortable);
constexpr auto HostRegisterMapped = GPU_PREFIX(HostRegisterMapped);
constexpr auto HostRegisterIoMemory = GPU_PREFIX(HostRegisterIoMemory);

constexpr auto StreamDefault = GPU_PREFIX(StreamDefault);
constexpr auto StreamNonBlocking = GPU_PREFIX(StreamNonBlocking);

constexpr auto MemoryTypeHost = GPU_PREFIX(MemoryTypeHost);
constexpr auto MemoryTypeDevice = GPU_PREFIX(MemoryTypeDevice);
#if (CUDART_VERSION >= 10000)
constexpr auto MemoryTypeUnregistered = GPU_PREFIX(MemoryTypeUnregistered);
constexpr auto MemoryTypeManaged = GPU_PREFIX(MemoryTypeManaged);
#endif

constexpr auto MemcpyHostToDevice = GPU_PREFIX(MemcpyHostToDevice);
constexpr auto MemcpyDeviceToHost = GPU_PREFIX(MemcpyDeviceToHost);
constexpr auto MemcpyDeviceToDevice = GPU_PREFIX(MemcpyDeviceToDevice);

constexpr auto EventDefault = GPU_PREFIX(EventDefault);
constexpr auto EventBlockingSync = GPU_PREFIX(EventBlockingSync);
constexpr auto EventDisableTiming = GPU_PREFIX(EventDisableTiming);
constexpr auto EventInterprocess = GPU_PREFIX(EventInterprocess);
}  // namespace flag

// ==================================
// Forwarding functions of to GPU API
// ==================================
template <typename... ARGS>
inline auto host_register(ARGS... args) -> StatusType {
  return GPU_PREFIX(HostRegister)(std::forward<ARGS>(args)...);
}

template <typename... ARGS>
inline auto host_unregister(ARGS... args) -> StatusType {
  return GPU_PREFIX(HostUnregister)(std::forward<ARGS>(args)...);
}

template <typename... ARGS>
inline auto stream_create_with_flags(ARGS... args) -> StatusType {
  return GPU_PREFIX(StreamCreateWithFlags)(std::forward<ARGS>(args)...);
}

template <typename... ARGS>
inline auto stream_destroy(ARGS... args) -> StatusType {
  return GPU_PREFIX(StreamDestroy)(std::forward<ARGS>(args)...);
}

template <typename... ARGS>
inline auto stream_wait_event(ARGS... args) -> StatusType {
  return GPU_PREFIX(StreamWaitEvent)(std::forward<ARGS>(args)...);
}

template <typename... ARGS>
inline auto event_create_with_flags(ARGS... args) -> StatusType {
  return GPU_PREFIX(EventCreateWithFlags)(std::forward<ARGS>(args)...);
}

template <typename... ARGS>
inline auto event_destroy(ARGS... args) -> StatusType {
  return GPU_PREFIX(EventDestroy)(std::forward<ARGS>(args)...);
}

template <typename... ARGS>
inline auto event_record(ARGS... args) -> StatusType {
  return GPU_PREFIX(EventRecord)(std::forward<ARGS>(args)...);
}

template <typename... ARGS>
inline auto event_synchronize(ARGS... args) -> StatusType {
  return GPU_PREFIX(EventSynchronize)(std::forward<ARGS>(args)...);
}

template <typename... ARGS>
inline auto event_elapsed_time(ARGS... args) -> StatusType {
  return GPU_PREFIX(EventElapsedTime)(std::forward<ARGS>(args)...);
}

template <typename... ARGS>
inline auto malloc(ARGS... args) -> StatusType {
  return GPU_PREFIX(Malloc)(std::forward<ARGS>(args)...);
}

template <typename... ARGS>
inline auto host_alloc(ARGS... args) -> StatusType {
#ifdef TILED_MM_CUDA
  return cudaHostAlloc(std::forward<ARGS>(args)...);
#else
  // hipHostAlloc is deprecated, use hipHostMalloc instead
  return hipHostMalloc(std::forward<ARGS>(args)...);
#endif
}

template <typename... ARGS>
inline auto free(ARGS... args) -> StatusType {
  return GPU_PREFIX(Free)(std::forward<ARGS>(args)...);
}

template <typename... ARGS>
inline auto memcpy(ARGS... args) -> StatusType {
  return GPU_PREFIX(Memcpy)(std::forward<ARGS>(args)...);
}

template <typename... ARGS>
inline auto memcpy_async(ARGS... args) -> StatusType {
  return GPU_PREFIX(MemcpyAsync)(std::forward<ARGS>(args)...);
}

template <typename... ARGS>
inline auto memcpy_2d(ARGS... args) -> StatusType {
  return GPU_PREFIX(Memcpy2D)(std::forward<ARGS>(args)...);
}

template <typename... ARGS>
inline auto memcpy_2d_async(ARGS... args) -> StatusType {
  return GPU_PREFIX(Memcpy2DAsync)(std::forward<ARGS>(args)...);
}

template <typename... ARGS>
inline auto get_device(ARGS... args) -> StatusType {
  return GPU_PREFIX(GetDevice)(std::forward<ARGS>(args)...);
}

template <typename... ARGS>
inline auto set_device(ARGS... args) -> StatusType {
  return GPU_PREFIX(SetDevice)(std::forward<ARGS>(args)...);
}

template <typename... ARGS>
inline auto get_device_count(ARGS... args) -> StatusType {
  return GPU_PREFIX(GetDeviceCount)(std::forward<ARGS>(args)...);
}

template <typename... ARGS>
inline auto stream_synchronize(ARGS... args) -> StatusType {
  return GPU_PREFIX(StreamSynchronize)(std::forward<ARGS>(args)...);
}

template <typename... ARGS>
inline auto memset_async(ARGS... args) -> StatusType {
  return GPU_PREFIX(MemsetAsync)(std::forward<ARGS>(args)...);
}

template <typename... ARGS>
inline auto pointer_get_attributes(ARGS... args) -> StatusType {
  return GPU_PREFIX(PointerGetAttributes)(std::forward<ARGS>(args)...);
}

template <typename... ARGS>
inline auto mem_get_info(ARGS... args) -> StatusType {
  return GPU_PREFIX(MemGetInfo)(std::forward<ARGS>(args)...);
}

template <typename... ARGS>
inline auto get_error_string(ARGS... args) -> const char* {
  return GPU_PREFIX(GetErrorString)(std::forward<ARGS>(args)...);
}

inline auto get_last_error() -> StatusType { return GPU_PREFIX(GetLastError)(); }

inline auto device_synchronize() -> StatusType { return GPU_PREFIX(DeviceSynchronize)(); }

}  // namespace runtime_api
}  // namespace gpu
}  // namespace cosma


================================================
FILE: src/cosma/gpu/nccl_mapper.hpp
================================================
#pragma once

#include <complex>

#if defined(TILED_MM_CUDA)
#include <nccl.h>

#elif defined(TILED_MM_ROCM)
#include <rccl/rccl.h>

#else
#error Either TILED_MM_CUDA or TILED_MM_ROCM must be defined!
#endif

namespace cosma {
namespace gpu {
/**
 * Maps a primitive numeric type to a MPI type.
 *
 * @tparam Scalar the numeric type to be mapped
 */
template <typename Scalar>
struct nccl_mapper {
  static inline ncclDataType_t getType();
};

template <>
inline ncclDataType_t nccl_mapper<double>::getType() {
  return ncclDouble;
}

template <>
inline ncclDataType_t nccl_mapper<float>::getType() {
  return ncclFloat;
}

template <>
inline ncclDataType_t nccl_mapper<std::complex<double>>::getType() {
  return ncclDouble;
}

template <>
inline ncclDataType_t nccl_mapper<std::complex<float>>::getType() {
  return ncclFloat;
}

// Removes const qualifier
//
template <typename Scalar>
struct nccl_mapper<const Scalar> {
  static inline ncclDataType_t getType();
};

template <typename Scalar>
inline ncclDataType_t nccl_mapper<const Scalar>::getType() {
  return nccl_mapper<Scalar>::getType();
}

} // end namespace gpu
} // end namespace cosma


================================================
FILE: src/cosma/gpu/nccl_utils.cpp
================================================
#include <iostream>

#include <cosma/communicator.hpp>
#include <cosma/gpu/utils.hpp>
#include <cosma/gpu/nccl_utils.hpp>
#include <cosma/gpu/nccl_mapper.hpp>
#include <cosma/profiler.hpp>

void cosma::gpu::free_nccl_comm(ncclComm_t nccl_comm) {
    auto status = ncclCommDestroy(nccl_comm);
    check_nccl_status(status);
}

void cosma::gpu::check_nccl_status(ncclResult_t result) {
    if (result != ncclSuccess) {
        std::cerr << "[NCCL ERROR]: " << ncclGetErrorString(result) << std::endl;
        throw(std::runtime_error("NCCL ERROR"));
    }
}

ncclComm_t cosma::gpu::mpi_to_nccl_comm(MPI_Comm comm) {
    if (comm == MPI_COMM_NULL) {
        return nullptr;
    }
    int my_rank, n_ranks;
    MPI_Comm_rank(comm, &my_rank);
    MPI_Comm_size(comm, &n_ranks);

    ncclUniqueId id;
    if (my_rank == 0) {
        auto status = ncclGetUniqueId(&id);
        check_nccl_status(status);
    }

    MPI_Bcast(&id, sizeof(id), MPI_BYTE, 0, comm);

    ncclComm_t nccl_comm;
    auto status = ncclCommInitRank(&nccl_comm, n_ranks, id, my_rank);
    check_nccl_status(status);

    return nccl_comm;
}

template <typename Scalar>
void cosma::gpu::nccl_copy(
            cosma_context<Scalar> *ctx,
            Interval &P,
            Scalar * in, // original_matrix
            Scalar * out,  // expanded matrix
            Scalar *reshuffle_buffer,
            std::vector<std::vector<int>>& size_before,
            std::vector<int> &total_before,
            int total_after,
            size_t step) {
    PE(multiply_communication_other);
    auto mpi_comm = ctx->get_cosma_comm()->active_comm(step);
    auto nccl_comm = ctx->get_cosma_comm()->active_nccl_comm(step);

    int rank = ctx->get_cosma_comm()->rank();
    int div = ctx->get_cosma_comm()->get_strategy().divisor(step);

    int gp, off;
    std::tie(gp, off) = P.locate_in_subinterval(div, rank);

    int relative_rank = rank - P.first();
    int local_size = total_before[relative_rank];

    int sum = 0;
    std::vector<int> total_size(div);
    std::vector<int> dspls(div);

    std::vector<int> subgroup(div);
    bool same_size = true;

    int max_block_size = 0;
    for (int i = 0; i < div; ++i) {
        int target = P.locate_in_interval(div, i, off);
        int temp_size = total_before[target];
        dspls[i] = sum;
        sum += temp_size;
        total_size[i] = temp_size;
        same_size &= temp_size == local_size;
        max_block_size = std::max(max_block_size, temp_size);
    }

    int n_blocks = size_before[relative_rank].size();

    // this will only resize the buffer if not already allocated
    ctx->get_memory_pool().allocate_device_receive_buffer(max_block_size);
    Scalar* d_send_pointer = ctx->get_memory_pool().device_receive_buffer.data();

    ctx->get_memory_pool().allocate_device_send_buffer(div * max_block_size);
    Scalar* d_receive_pointer = ctx->get_memory_pool().device_send_buffer.data();

    auto stream = ctx->gpu_stream.stream();

    // copy input matrix to device
    gpu::copy_to_device_async(in, d_send_pointer, local_size, stream);

    PL();

    PE(multiply_communication_copy);
    auto nccl_type = nccl_mapper<Scalar>::getType();
    int block_size = max_block_size;
    if (is_complex<Scalar>()) {
        block_size *= 2;
    }

    ncclAllGather(reinterpret_cast<void*>(d_send_pointer),
            reinterpret_cast<void*>(d_receive_pointer),
            block_size,
            nccl_type,
            nccl_comm,
            stream);

    int index = 0;
    std::vector<int> block_offset(div);
    // order all first sequential parts of all groups first and so on..
    for (int block = 0; block < n_blocks; block++) {
        for (int rank = 0; rank < div; rank++) {
            int target = P.locate_in_interval(div, rank, off);
            int dsp = dspls[rank] + block_offset[rank];
            int b_size = size_before[target][block];
            gpu::copy_to_host_async(
                d_receive_pointer + rank * max_block_size + block_offset[rank],
                out + index, 
                b_size,
                stream);
            index += b_size;
            block_offset[rank] += b_size;
        }
    }

    // wait for the result on the host
    gpu::runtime_api::stream_synchronize(stream);

    PL();
}

template <typename Scalar>
void cosma::gpu::nccl_reduce(
            cosma_context<Scalar> *ctx,
            Interval &P,
            Scalar *LC, // expanded_matrix
            Scalar *C,  // original matrix
            Scalar *reshuffle_buffer,
            Scalar *reduce_buffer,
            std::vector<std::vector<int>> &c_current,
            std::vector<int> &c_total_current,
            std::vector<std::vector<int>> &c_expanded,
            std::vector<int> &c_total_expanded,
            Scalar beta,
            size_t step,
            bool copy_c_back) {
    PE(multiply_communication_other);
    auto mpi_comm = ctx->get_cosma_comm()->active_comm(step);
    auto nccl_comm = ctx->get_cosma_comm()->active_nccl_comm(step);

    int rank = ctx->get_cosma_comm()->rank();
    int div = ctx->get_cosma_comm()->get_strategy().divisor(step);

    // int div = strategy_->divisor(step);
    // MPI_Comm subcomm = active_comm(step);

    std::vector<int> subgroup(div);

    int gp, off;
    std::tie(gp, off) = P.locate_in_subinterval(div, rank);
    // int gp, off;
    // std::tie(gp, off) = group_and_offset(P, div);

    // reorder the elements as:
    // first all blocks that should be sent to rank 0 then all blocks for
    // rank 1 and so on...
    int n_blocks = c_expanded[off].size();
    std::vector<int> block_offset(n_blocks);

    int sum = 0;
    for (int i = 0; i < n_blocks; ++i) {
        block_offset[i] = sum;
        sum += c_expanded[off][i];
    }

    std::vector<int> recvcnts(div);
    int max_block_size = 0;
    int min_block_size = recvcnts[0];
    for (int i = 0; i < div; ++i) {
        int target = P.locate_in_interval(div, i, off);
        recvcnts[i] = c_total_current[target];
        // the max block size (used to determine the padding)
        max_block_size = std::max(max_block_size, recvcnts[i]);
        min_block_size = std::min(min_block_size, recvcnts[i]);
    }

    bool same_blocks = max_block_size == min_block_size;

    // here is the result of matrix multiplication on GPU
    Scalar* d_LC = LC;
    if (!copy_c_back) {
        d_LC = ctx->get_gpu_context()->get_full_device_buffer_c().data();
    }

    // this will only resize the buffer if not already allocated
    ctx->get_memory_pool().allocate_device_send_buffer(div * max_block_size);
    Scalar* d_reshuffle_buffer = ctx->get_memory_pool().device_send_buffer.data();

    ctx->get_memory_pool().allocate_device_receive_buffer(max_block_size);
    Scalar* d_receive_pointer = ctx->get_memory_pool().device_receive_buffer.data();

    auto stream = ctx->gpu_stream.stream();

    // set all to 0s, so that we don't have to pad each block with 0s up to max_block_size
    /*
    if (!same_blocks) {
        gpu::runtime_api::memset_async(d_reshuffle_buffer, 0, div * max_block_size, stream);
    }
    */

    std::vector<int> blocks_offset_per_group(div, 0);
    // go through the communication ring
    for (int i = 0; i < div; ++i) {
        int target = P.locate_in_interval(div, i, off);

        for (int block = 0; block < n_blocks; ++block) {
            int b_offset = block_offset[block];
            int b_size = c_current[target][block];
            // reshuffle directly into the gpu buffer
            if (!copy_c_back) {
                gpu::copy_device_to_device_async(d_LC + b_offset, 
                                                 d_reshuffle_buffer + i * max_block_size + blocks_offset_per_group[i],
                                                 b_size, stream);
            } else {
                gpu::copy_to_device_async(d_LC + b_offset, 
                                          d_reshuffle_buffer + i * max_block_size + blocks_offset_per_group[i],
                                          b_size, stream);
            }
            // pad with 0s if not all the blocks are the same
            // padding is not necessary because the array is initialized to 0s above
            // in a single kernel
            /*
            if (b_size < max_block_size) {
                gpu::runtime_api::memset_async(d_reshuffle_buffer + index + b_size, 0, max_block_size - b_size);
            }
            */
            block_offset[block] += b_size;
            blocks_offset_per_group[i] += b_size;
        }
    }

    Scalar *receive_pointer = beta != Scalar{0} ? reduce_buffer : C;
    PL();

    PE(multiply_communication_reduce);
    auto nccl_type = nccl_mapper<Scalar>::getType();
    int block_size = max_block_size;
    if (is_complex<Scalar>()) {
        block_size *= 2;
    }
    ncclReduceScatter(reinterpret_cast<void*>(d_reshuffle_buffer),
            reinterpret_cast<void*>(d_receive_pointer),
            block_size,
            nccl_type,
            ncclSum,
            nccl_comm,
            stream);
    gpu::copy_to_host_async(d_receive_pointer, receive_pointer, recvcnts[gp], stream);

    // wait for the result on the host
    gpu::runtime_api::stream_synchronize(stream);
    PL();

    PE(multiply_communication_other);
    if (beta != Scalar{0}) {
        // sum up receiving_buffer with C
        for (int el = 0; el < recvcnts[gp]; ++el) {
            C[el] = beta * C[el] + reduce_buffer[el];
        }
    }
    PL();
}

// template instantiation for nccl_reduce
template void cosma::gpu::nccl_reduce<float>(
            cosma_context<float> *ctx,
            Interval &P,
            float *LC, // expanded_matrix
            float *C,  // original matrix
            float *reshuffle_buffer,
            float *reduce_buffer,
            std::vector<std::vector<int>> &c_current,
            std::vector<int> &c_total_current,
            std::vector<std::vector<int>> &c_expanded,
            std::vector<int> &c_total_expanded,
            float beta,
            size_t step,
            bool copy_c_back);

template void cosma::gpu::nccl_reduce<double>(
            cosma_context<double> *ctx,
            Interval &P,
            double *LC, // expanded_matrix
            double *C,  // original matrix
            double *reshuffle_buffer,
            double *reduce_buffer,
            std::vector<std::vector<int>> &c_current,
            std::vector<int> &c_total_current,
            std::vector<std::vector<int>> &c_expanded,
            std::vector<int> &c_total_expanded,
            double beta,
            size_t step,
            bool copy_c_back);

template void cosma::gpu::nccl_reduce<std::complex<float>>(
            cosma_context<std::complex<float>> *ctx,
            Interval &P,
            std::complex<float> *LC, // expanded_matrix
            std::complex<float> *C,  // original matrix
            std::complex<float> *reshuffle_buffer,
            std::complex<float> *reduce_buffer,
            std::vector<std::vector<int>> &c_current,
            std::vector<int> &c_total_current,
            std::vector<std::vector<int>> &c_expanded,
            std::vector<int> &c_total_expanded,
            std::complex<float> beta,
            size_t step,
            bool copy_c_back);

template void cosma::gpu::nccl_reduce<std::complex<double>>(
            cosma_context<std::complex<double>> *ctx,
            Interval &P,
            std::complex<double> *LC, // expanded_matrix
            std::complex<double> *C,  // original matrix
            std::complex<double> *reshuffle_buffer,
            std::complex<double> *reduce_buffer,
            std::vector<std::vector<int>> &c_current,
            std::vector<int> &c_total_current,
            std::vector<std::vector<int>> &c_expanded,
            std::vector<int> &c_total_expanded,
            std::complex<double> beta,
            size_t step,
            bool copy_c_back);

// template instantiation for nccl_copy
template void cosma::gpu::nccl_copy<float>(
            cosma_context<float> *ctx,
            Interval &P,
            float * in, // original_matrix
            float * out,  // expanded matrix
            float *reshuffle_buffer,
            std::vector<std::vector<int>>& size_before,
            std::vector<int> &total_before,
            int total_after,
            size_t step);
template void cosma::gpu::nccl_copy<double>(
            cosma_context<double> *ctx,
            Interval &P,
            double * in, // original_matrix
            double * out,  // expanded matrix
            double *reshuffle_buffer,
            std::vector<std::vector<int>>& size_before,
            std::vector<int> &total_before,
            int total_after,
            size_t step);
template void cosma::gpu::nccl_copy<std::complex<float>>(
            cosma_context<std::complex<float>> *ctx,
            Interval &P,
            std::complex<float> * in, // original_matrix
            std::complex<float> * out,  // expanded matrix
            std::complex<float> *reshuffle_buffer,
            std::vector<std::vector<int>>& size_before,
            std::vector<int> &total_before,
            int total_after,
            size_t step);

template void cosma::gpu::nccl_copy<std::complex<double>>(
            cosma_context<std::complex<double>> *ctx,
            Interval &P,
            std::complex<double> * in, // original_matrix
            std::complex<double> * out,  // expanded matrix
            std::complex<double> *reshuffle_buffer,
            std::vector<std::vector<int>>& size_before,
            std::vector<int> &total_before,
            int total_after,
            size_t step);


================================================
FILE: src/cosma/gpu/nccl_utils.hpp
================================================
#pragma once
#include <vector>

#include <cosma/interval.hpp>
#include <cosma/context.hpp>

#include <mpi.h>

#if defined(TILED_MM_CUDA)
#include <nccl.h>

#elif defined(TILED_MM_ROCM)
#include <rccl/rccl.h>

#else
#error Either TILED_MM_CUDA or TILED_MM_ROCM must be defined!
#endif

namespace cosma {
namespace gpu {
    void check_nccl_status(ncclResult_t result);

    ncclComm_t mpi_to_nccl_comm(MPI_Comm comm);

    void free_nccl_comm(ncclComm_t nccl_comm);

    template <typename Scalar>
    void nccl_copy(
                cosma_context<Scalar> *ctx,
                Interval &P,
                Scalar * in, // original_matrix
                Scalar * out,  // expanded matrix
                Scalar *reshuffle_buffer,
                std::vector<std::vector<int>>& size_before,
                std::vector<int> &total_before,
                int total_after,
                size_t step);

    template <typename Scalar>
    void nccl_reduce(
                cosma_context<Scalar> *ctx,
                Interval &P,
                Scalar *LC, // expanded_matrix
                Scalar *C,  // original matrix
                Scalar *reshuffle_buffer,
                Scalar *reduce_buffer,
                std::vector<std::vector<int>> &c_current,
                std::vector<int> &c_total_current,
                std::vector<std::vector<int>> &c_expanded,
                std::vector<int> &c_total_expanded,
                Scalar beta,
                size_t step,
                bool copy_c_back);

}  // namespace gpu
}  // namespace cosma


================================================
FILE: src/cosma/gpu/utils.hpp
================================================
#pragma once

#include <cosma/gpu/gpu_runtime_api.hpp>

namespace cosma {
namespace gpu {
    void check_runtime_status(runtime_api::StatusType status) {
        if(status !=  runtime_api::status::Success) {
            std::cerr << "error: GPU API call : "
            << runtime_api::get_error_string(status) << std::endl;
            throw(std::runtime_error("GPU ERROR"));
        }
    }

    // copy n*T from host to device
    // If a cuda stream is passed as the final argument the copy will be performed
    // asynchronously in the specified stream, otherwise it will be serialized in
    // the default (NULL) stream
    template <typename T>
    void copy_to_device_async(const T* from, T* to, size_t n, runtime_api::StreamType stream=NULL) {
        auto status = runtime_api::memcpy_async(to, from, n * sizeof(T),
                runtime_api::flag::MemcpyHostToDevice, stream);
        check_runtime_status(status);
    }

    // copy n*T from device to host
    // If a cuda stream is passed as the final argument the copy will be performed
    // asynchronously in the specified stream, otherwise it will be serialized in
    // the default (NULL) stream
    template <typename T>
    void copy_to_host_async(const T* from, T* to, size_t n, runtime_api::StreamType stream=NULL) {
        auto status = runtime_api::memcpy_async(to, from, n * sizeof(T),
                                                runtime_api::flag::MemcpyDeviceToHost, stream);
        check_runtime_status(status);
    }

    template <typename T>
    void copy_device_to_device_async(const T* from, T* to, size_t n, runtime_api::StreamType stream=NULL) {
        auto status = runtime_api::memcpy_async(to, from, n * sizeof(T),
                                                runtime_api::flag::MemcpyDeviceToDevice, stream);
        check_runtime_status(status);
    }


} // gpu
} // cosma


================================================
FILE: src/cosma/interpose.h
================================================
/*
 * taken from: https://github.com/helixhorned/interpose/tree/feature/c-header-and-example-logger
 * which is forked from: https://github.com/ccurtsinger/interpose
 *
 * MIT License

   Copyright (c) 2017 Charlie Curtsinger

   Permission is hereby granted, free of charge, to any person obtaining a copy of this software and associated documentation files (the "Software"), to deal in the Software without restriction, including without limitation the rights to use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies of the Software, and to permit persons to whom the Software is furnished to do so, subject to the following conditions:

   The above copyright notice and this permission notice shall be included in all copies or substantial portions of the Software.

   THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
 */

#if !defined(__INTERPOSE_H)
#define __INTERPOSE_H

#include <stdint.h>

#include <dlfcn.h>

#if defined(__ELF__)

#if !defined _GNU_SOURCE
# error Must define _GNU_SOURCE for RTLD_NEXT to be available.
#endif

// The C version of the interposition macro differs from that of the C++ version: the user
// has to provide the argument type-and-name list as a macro argument (instead of postfixing
// it to the macro invocation), and to redundantly provide the list of just argument names,
// which must match those in the type-and-name list.
#define INTERPOSE__C_GENERIC__(RETURN_TYPE, NAME, ARG_TYPE_AND_NAME_LIST, ...) \
  static RETURN_TYPE Real__##NAME ARG_TYPE_AND_NAME_LIST { \
    static __typeof__(NAME)* real_##NAME; \
    __typeof__(NAME)* func = __atomic_load_n(&real_##NAME, __ATOMIC_CONSUME); \
    if(!func) { \
      func = (__typeof__(NAME)*)( \
        (uintptr_t)(dlsym(RTLD_NEXT, #NAME))); \
      __atomic_store_n(&real_##NAME, func, __ATOMIC_RELEASE); \
    } \
    __VA_ARGS__; \
  } \
  extern __typeof__(NAME) NAME __attribute__((weak, alias("__interpose_" #NAME))); \
  extern RETURN_TYPE __interpose_##NAME ARG_TYPE_AND_NAME_LIST

#define INTERPOSE_C(RETURN_TYPE, NAME, ARG_TYPE_AND_NAME_LIST, ARG_NAME_LIST) \
  INTERPOSE__C_GENERIC__(RETURN_TYPE, NAME, ARG_TYPE_AND_NAME_LIST, return func ARG_NAME_LIST)

#define INTERPOSE_C_VOID(NAME, ARG_TYPE_AND_NAME_LIST, ARG_NAME_LIST) \
  INTERPOSE__C_GENERIC__(void, NAME, ARG_TYPE_AND_NAME_LIST, func ARG_NAME_LIST)

#elif defined(__APPLE__)

/// Structure exposed to the linker for interposition
struct __osx_interpose {
	const void* new_func;
	const void* orig_func;
};

/**
 * Generate a macOS interpose struct
 * Types from: http://opensource.apple.com/source/dyld/dyld-210.2.3/include/mach-o/dyld-interposing.h
 */
#define OSX_INTERPOSE_STRUCT(NEW, OLD) \
  static const struct __osx_interpose __osx_interpose_##OLD \
    __attribute__((used, section("__DATA, __interpose"))) = \
    { (const void*)((uintptr_t)(&(NEW))), \
      (const void*)((uintptr_t)(&(OLD))) }

/**
  * The OSX interposition process is much simpler. Just create an OSX interpose struct,
  * include the actual function in the `real` namespace, and declare the beginning of the
  * replacement function with the appropriate return type.
  */
#define INTERPOSE__C_GENERIC__(RETURN_TYPE, NAME, ARG_TYPE_AND_NAME_LIST, ...) \
  static RETURN_TYPE Real__##NAME ARG_TYPE_AND_NAME_LIST { \
    __VA_ARGS__; \
  } \
  extern RETURN_TYPE __interpose_##NAME ARG_TYPE_AND_NAME_LIST; \
  OSX_INTERPOSE_STRUCT(__interpose_##NAME, NAME); \
  extern RETURN_TYPE __interpose_##NAME ARG_TYPE_AND_NAME_LIST

#define INTERPOSE_C(RETURN_TYPE, NAME, ARG_TYPE_AND_NAME_LIST, ARG_NAME_LIST) \
  INTERPOSE__C_GENERIC__(RETURN_TYPE, NAME, ARG_TYPE_AND_NAME_LIST, return NAME ARG_NAME_LIST)

#define INTERPOSE_C_VOID(NAME, ARG_TYPE_AND_NAME_LIST, ARG_NAME_LIST) \
  INTERPOSE__C_GENERIC__(void, NAME, ARG_TYPE_AND_NAME_LIST, NAME ARG_NAME_LIST)

#else
# error Unsupported platform.
#endif

#endif


================================================
FILE: src/cosma/interval.cpp
================================================
#include <cosma/interval.hpp>

namespace cosma {
// interval of consecutive numbers
Interval::Interval() = default;

Interval::Interval(int start, int end)
    : start_(start)
    , end_(end) {
    if (start < 0 || end < 0) {
        throw std::runtime_error(
            "ERROR: in class interval (COSMA): start, end > 0 must be satisfied.");
    }

    if (start > end) {
        throw std::runtime_error(
            "ERROR: in class interval (COSMA): start<=end must be satisfied.");
    }
}

int Interval::first() const { return start_; }

int Interval::last() const { return end_; }

std::size_t Interval::length() { return end_ - start_ + 1; }

bool Interval::empty() { return start_ == end_; }

bool Interval::only_one() { return length() == 1; }

// divides the interval into intervals of equal length.
// if the interval is not divisible by divisor, then
// last interval might not be of the same size as others.
std::vector<Interval> Interval::divide_by(int divisor) {
    if (length() < divisor) {
        return {*this};
    }

    std::vector<Interval> divided(divisor);

    for (int i = 0; i < divisor; i++) {
        divided[i] = subinterval(divisor, i);
    }

    return divided;
}

int Interval::subinterval_index(int divisor, int elem) {
    int subset_size = length() / divisor;
    int relative = elem - first();
    int subint_index = relative / subset_size;
    return subint_index;
}

int Interval::subinterval_offset(int divisor, int elem) {
    int subset_size = length() / divisor;
    int relative = elem - first();
    int subint_index = relative / subset_size;
    int offset = relative - subint_index * subset_size;
    return offset;
}

std::pair<int, int> Interval::locate_in_subinterval(int divisor, int elem) {
    int subset_size = length() / divisor;
    int relative = elem - first();
    int subint_index = relative / subset_size;
    int offset = relative - subint_index * subset_size;
    return {subint_index, offset};
}

int Interval::locate_in_interval(int divisor,
                                 int subint_index,
                                 int subint_offset) {
    int subset_size = length() / divisor;
    return subint_index * subset_size + subint_offset;
}

// returns the interval containing elem
Interval Interval::subinterval_containing(int divisor, int elem) {
    return subinterval(divisor, subinterval_index(divisor, elem));
}

// returns the box_index-th interval
Interval Interval::subinterval(int divisor, int box_index) {
    if (length() < divisor) {
        return {*this};
    }

    // this will interleave smaller and bigger intervals
    int start = length() * box_index / divisor;
    int end = length() * (box_index + 1) / divisor - 1;

    // alternative that will first have all the bigger intervals and then the
    // smaller ones int interval_length = length() / divisor + (box_index <=
    // length() % divisor ? 1 : 0)

    return Interval(start_ + start, start_ + end);
}

int Interval::largest_subinterval_length(int divisor) {
    return length() / divisor + (length() % divisor == 0 ? 0 : 1);
}

int Interval::smallest_subinterval_length(int divisor) {
    return length() / divisor;
}

std::ostream &operator<<(std::ostream &os, const Interval &inter) {
    os << '[' << inter.start_ << ", " << inter.end_ << ']';
    return os;
}

bool Interval::contains(int num) { return num >= first() && num <= last(); }

bool Interval::contains(Interval other) {
    return first() <= other.first() && last() >= other.last();
}

bool Interval::before(Interval &other) const { 
    return last() < other.first(); 
}

bool Interval::operator==(const Interval &other) const {
    return start_ == other.start_ && end_ == other.end_;
}

Interval2D::Interval2D() = default;
Interval2D::Interval2D(Interval row, Interval col)
    : rows(row)
    , cols(col) {}

Interval2D::Interval2D(int row_start, int row_end, int col_start, int col_end) {
    rows = Interval(row_start, row_end);
    cols = Interval(col_start, col_end);
}

// splits the current Interval2D into divisor many submatrices by splitting
// only the columns interval and returns the size of the submatrix indexed with
// index
std::size_t Interval2D::split_by(int divisor, int index) {
    if (index >= divisor) {
        std::cout << "Error in Interval2D.split_by: trying to access " << index
                  << "-subinterval, out of " << divisor
                  << " total subintervals\n";
        return -1;
    }

    if (cols.length() < 0 || cols.length() < divisor) {
        std::cout << "Error in Interval2D.split_by: trying to divide the "
                     "subinterval of length "
                  << cols.length() << " into " << divisor
                  << " many subintervals\n";
        return -1;
    }

    return rows.length() * cols.subinterval(divisor, index).length();
}

std::size_t Interval2D::size() {
    auto size = split_by(1, 0);
    return size;
}

bool Interval2D::contains(int row, int col) {
    return rows.contains(row) && cols.contains(col);
}

bool Interval2D::contains(Interval2D other) {
    return rows.contains(other.rows) && cols.contains(other.cols);
}

bool Interval2D::before(Interval2D &other) const {
    return (rows.before(other.rows) && other.cols.contains(cols)) ||
           (cols.before(other.cols) && other.rows.contains(rows));
    // return (rows.before(other.rows))|| (cols.before(other.cols));
}

int Interval2D::local_index(int row, int col) {
    if (!contains(row, col)) {
        return -1;
    }
    row -= rows.first();
    col -= cols.first();
    return col * rows.length() + row;
}

std::pair<int, int> Interval2D::global_index(int local_index) {
    int x, y;
    x = rows.first() + local_index % rows.length();
    y = cols.first() + local_index / rows.length();
    return {x, y};
}

Interval2D Interval2D::submatrix(int divisor, int index) {
    return Interval2D(rows, cols.subinterval(divisor, index));
}

bool Interval2D::operator==(const Interval2D &other) const {
    return (rows == other.rows) && (cols == other.cols);
}

std::ostream &operator<<(std::ostream &os, const Interval2D &inter) {
    os << "rows " << inter.rows << "; columns: " << inter.cols;
    return os;
}
} // namespace cosma


================================================
FILE: src/cosma/interval.hpp
================================================
#pragma once
#include <iostream>
#include <vector>

namespace cosma {
// interval of consecutive numbers
class Interval {
  public:
    int start_;
    int end_;

    Interval();
    Interval(int start, int end);

    int first() const;
    int last() const;

    std::size_t length();
    bool empty();
    bool only_one();

    // divides the interval into intervals of equal length.
    // if the interval is not divisible by divisor, then
    // last interval might not be of the same size as others.
    std::vector<Interval> divide_by(int divisor);

    int subinterval_index(int divisor, int elem);
    int subinterval_offset(int divisor, int elem);
    std::pair<int, int> locate_in_subinterval(int divisor, int elem);
    int locate_in_interval(int divisor, int subint_index, int subint_offset);

    // returns the interval containing elem
    Interval subinterval_containing(int divisor, int elem);

    // returns the box_index-th interval
    Interval subinterval(int divisor, int box_index);

    // returns the largest subinterval when divided by divisor
    int largest_subinterval_length(int divisor);

    // returns the smallest subinterval when divided by divisor
    int smallest_subinterval_length(int divisor);

    bool contains(int num);
    bool contains(Interval other);
    bool before(Interval &other) const;

    bool operator==(const Interval &other) const;

    friend std::ostream &operator<<(std::ostream &os, const Interval &inter);
};

class Interval2D {
  public:
    Interval rows;
    Interval cols;

    Interval2D();
    Interval2D(Interval row, Interval col);
    Interval2D(int row_start, int row_end, int col_start, int col_end);

    // splits the current Interval2D into divisor many submatrices by splitting
    // only the columns interval and returns the size of the submatrix indexed
    // with index
    std::size_t split_by(int divisor, int index);

    std::size_t size();

    bool contains(int row, int col);
    bool contains(Interval2D other);
    bool before(Interval2D &other) const;

    int local_index(int row, int col);
    std::pair<int, int> global_index(int local_index);

    Interval2D submatrix(int divisor, int index);

    bool operator==(const Interval2D &other) const;
    friend std::ostream &operator<<(std::ostream &os, const Interval2D &inter);
};
} // namespace cosma

template <class T>
inline void hash_combine(std::size_t &s, const T &v) {
    std::hash<T> h;
    s ^= h(v) + 0x9e3779b9 + (s << 6) + (s >> 2);
}

// add hash function specialization for these struct-s
// so that we can use this class as a key of the unordered_map
namespace std {
template <>
struct hash<cosma::Interval> {
    std::size_t operator()(const cosma::Interval &k) const {
        using std::hash;

        // Compute individual hash values for first,
        // second and third and combine them using XOR
        // and bit shifting:
        size_t result = 0;
        hash_combine(result, k.start_);
        hash_combine(result, k.end_);
        return result;
    }
};

template <>
struct hash<cosma::Interval2D> {
    std::size_t operator()(const cosma::Interval2D &k) const {
        using std::hash;

        // Compute individual hash values for first,
        // second and third and combine them using XOR
        // and bit shifting:
        size_t result = 0;
        hash_combine(result, k.rows);
        hash_combine(result, k.cols);
        return result;
    }
};
} // namespace std


================================================
FILE: src/cosma/layout.cpp
================================================
#include <cosma/layout.hpp>
#include <cosma/profiler.hpp>

namespace cosma {
Layout::Layout(Mapper* mapper):
    label_(mapper->label()),
    m_(mapper->m()),
    n_(mapper->n()),
    P_(mapper->P()),
    rank_(mapper->rank()),
    mapper_(mapper) {
    PE(preprocessing_matrices_layout);
    initial_size_ = std::vector<int>(P_);
    bucket_size_ = std::vector<std::vector<int>>(P_, std::vector<int>());
    pointer_ = std::vector<int>(P_);

    for (size_t p = 0; p < P_; ++p) {
        int sum = 0;
        auto ranges = mapper_->initial_layout(p);

        for (size_t bucket = 0; bucket < ranges.size(); ++bucket) {
            int size = ranges[bucket].size();
            bucket_size_[p].push_back(size);
            sum += size;
        }
        initial_size_[p] = sum;
    }
    PL();
}

int Layout::size() { 
    return size(rank_); 
}

int Layout::size(int rank) { 
    if (rank < P_)
        return bucket_size_[rank][pointer_[rank]]; 
    return 0;
}

// we cannot use the precomputed bucket_offset_ here, since
// the buckets might have increased due to communication
// if parallel and sequential steps are interleaved
int Layout::offset(int rank, int prev_pointer) {
    int sum = 0;

    for (int pointer = prev_pointer; pointer < pointer_[rank]; ++pointer) {
        sum += bucket_size_[rank][pointer];
    }

    return sum;
}

int Layout::offset(int prev_pointer) { return offset(rank_, prev_pointer); }

void Layout::next(int rank) { pointer_[rank]++; }

void Layout::next() {
    // move the pointer to the next range that this rank owns
    // and put its size in buffer_size_[rank_]
    next(rank_);
}

void Layout::prev(int rank) { pointer_[rank]--; }

void Layout::prev() { prev(rank_); }

std::vector<int> Layout::seq_buckets(Interval &newP) {
    std::vector<int> result(newP.length());
    for (int i = newP.first(); i <= newP.last(); ++i) {
        result[i - newP.first()] = pointer_[i];
    }
    return result;
}

void Layout::set_seq_buckets(Interval &newP, std::vector<int> &pointers) {
    for (int i = newP.first(); i <= newP.last(); ++i) {
        pointer_[i] = pointers[i - newP.first()];
    }
}

int Layout::seq_bucket(int rank) { return pointer_[rank]; }

int Layout::seq_bucket() { return seq_bucket(rank_); }

void Layout::update_buckets(Interval &P, Interval2D &range) {
    for (int rank = P.first(); rank <= P.last(); ++rank) {
        int pointer = pointer_[rank];
        auto &ranges = mapper_->initial_layout(rank);

        while (pointer < ranges.size() && ranges[pointer].before(range)) {
            next(rank);
            pointer++;
        }
    }
}

void Layout::buffers_before_expansion(
    Interval &P,
    Interval2D &range,
    std::vector<std::vector<int>> &size_per_rank,
    std::vector<int> &total_size_per_rank) {

    for (int i = P.first(); i <= P.last(); ++i) {
        size_per_rank[i - P.first()] =
            sizes_inside_range(range, i, total_size_per_rank[i - P.first()]);
    }
}

void Layout::buffers_after_expansion(
    Interval &P,
    Interval &newP,
    std::vector<std::vector<int>> &size_per_rank,
    std::vector<int> &total_size_per_rank,
    std::vector<std::vector<int>> &new_size,
    std::vector<int> &new_total) {
    int subset_size = newP.length();
    int div = P.length() / newP.length();

    for (int comm_ring = 0; comm_ring < newP.length(); ++comm_ring) {
        int n_bucket = size_per_rank[comm_ring].size();
        new_size[comm_ring] = std::vector<int>(n_bucket);

        for (int bucket = 0; bucket < n_bucket; ++bucket) {
            for (int group = 0; group < div; ++group) {
                int rank = group * subset_size + comm_ring;
                new_size[comm_ring][bucket] += size_per_rank[rank][bucket];
            }
            new_total[comm_ring] += new_size[comm_ring][bucket];
        }
    }
}

void Layout::set_sizes(Interval &newP,
                       std::vector<std::vector<int>> &size_per_rank,
                       int offset) {
    for (int i = newP.first(); i <= newP.last(); ++i) {
        set_sizes(i, size_per_rank[i - newP.first() + offset], pointer_[i]);
    }
}

void Layout::set_sizes(Interval &newP,
                       std::vector<std::vector<int>> &size_per_rank) {
    for (int i = newP.first(); i <= newP.last(); ++i) {
        set_sizes(i, size_per_rank[i - newP.first()], pointer_[i]);
    }
}

void Layout::set_sizes(int rank, std::vector<int> &sizes, int start) {
    int pointer = start;
    auto &b_sizes = bucket_size_[rank];

    for (int i = pointer; i < std::min(sizes.size() + pointer, b_sizes.size());
         ++i) {
        b_sizes[i] = sizes[i - pointer];
    }
}

// get sizes of all ranges inside range of rank and remember the total_size
std::vector<int>
Layout::sizes_inside_range(Interval2D &range, int rank, int &total_size) {
    if (rank >= P_) {
        total_size = 0;
        return {};
    }

    std::vector<int> sizes;
    total_size = 0;
    int pointer = pointer_[rank];
    auto &ranges = mapper_->initial_layout(rank);
    auto &b_size = bucket_size_[rank];

    while (pointer < ranges.size()) {
        auto &current_range = ranges[pointer];

        if (!range.contains(current_range)) {
            break;
        }

        int current_size = b_size[pointer];

        sizes.push_back(current_size);
        total_size += current_size;

        pointer++;
    }
    return sizes;
}
} // namespace cosma


================================================
FILE: src/cosma/layout.hpp
================================================
#pragma once

#include <cosma/interval.hpp>
#include <cosma/mapper.hpp>

#include <algorithm>
#include <cassert>
#include <fstream>
#include <memory>
#include <numeric>
#include <set>
#include <stdexcept>
#include <string>
#include <tuple>
#include <unordered_map>
#include <vector>

namespace cosma {
class Layout {

  public:
    Layout() = default;
    Layout(Mapper* mapper);

    int size(int rank);
    int size();

    int offset(int rank, int prev_bucket);
    int offset(int prev_bucket);

    void update_buckets(Interval &P, Interval2D &range);
    int seq_bucket(int rank);
    int seq_bucket();
    std::vector<int> seq_buckets(Interval &newP);
    void set_seq_buckets(Interval &newP, std::vector<int> &pointers);

    void buffers_before_expansion(Interval &P,
                                  Interval2D &range,
                                  std::vector<std::vector<int>> &size_per_rank,
                                  std::vector<int> &total_size_per_rank);

    void buffers_after_expansion(Interval &P,
                                 Interval &newP,
                                 std::vector<std::vector<int>> &size_per_rank,
                                 std::vector<int> &total_size_per_rank,
                                 std::vector<std::vector<int>> &new_size,
                                 std::vector<int> &new_total);

    std::vector<int>
    sizes_inside_range(Interval2D &range, int rank, int &total_size);

    void set_sizes(Interval &newP,
                   std::vector<std::vector<int>> &size_per_rank,
                   int offset);
    void set_sizes(Interval &newP,
                   std::vector<std::vector<int>> &size_per_rank);
    void set_sizes(int rank, std::vector<int> &sizes, int start);

  protected:
    char label_;

    /// Number of rows of the global atrix
    int m_;
    /// Number of columns of the global matrix
    int n_;
    /// Maximum number of rank in the global communicator
    int P_;

    int rank_;

    // rank -> list of submatrices that this rank owns
    // the number of submatrices that this rank owns
    // is equal to the number of seq steps in which
    // this matrix was divided
    // std::vector<std::vector<Interval2D>> rank_to_range_;
    // rank -> total initial buffer size
    std::vector<int> initial_size_;
    // rank -> buffer size in the current branch of steps
    std::vector<std::vector<int>> bucket_size_;
    std::vector<int> pointer_;

    Interval mi_;
    Interval ni_;
    Interval Pi_;

    std::vector<int> ranks_reordering;
    bool ranks_reordered = false;

    Mapper* mapper_;

  private:
    void next(int rank);
    void next();

    void prev(int rank);
    void prev();
};
} // namespace cosma


================================================
FILE: src/cosma/local_multiply.cpp
================================================
#include "cosma/context.hpp"
#include <cosma/local_multiply.hpp>
#include <cosma/profiler.hpp>
#include <cosma/timer.hpp>

#ifdef COSMA_HAVE_GPU
#include <Tiled-MM/tiled_mm.hpp>
#include <Tiled-MM/util.hpp>

#ifdef COSMA_USE_UNIFIED_MEMORY
#include <Tiled-MM/gpu_blas_api.hpp>
#endif
#endif

#if defined(COSMA_WITH_BLAS) || defined(COSMA_WITH_MKL_BLAS)
#include <cosma/blas.hpp>
#endif

#include <chrono>
#include <complex>
#include <vector>

#include <mpi.h>

namespace cosma {

using clock_t = std::chrono::high_resolution_clock;
using ms_t = std::chrono::milliseconds;

#ifdef COSMA_USE_UNIFIED_MEMORY
using zfloat = std::complex<float>;
using zdouble = std::complex<double>;

int get_first(char trans, int m, int n) { return trans == 'N' ? m : n; }

int get_second(char trans, int m, int n) { return trans == 'N' ? n : m; }

gpu::blas_api::OperationType get_blas_operation(char trans) {
    gpu::blas_api::OperationType op =
        trans == 'T'
            ? gpu::blas_api::operation::Transpose
            : (trans == 'C' ? gpu::blas_api::operation::ConjugateTranspose
                            : gpu::blas_api::operation::None);
    return op;
}

gpu::blas_api::StatusType cublas_gemm_wrapper(gpu::blas_api::HandleType handle,
                                              char trans_a,
                                              char trans_b,
                                              int m,
                                              int n,
                                              int k,
                                              const float *alpha,
                                              const float *a,
                                              const float *b,
                                              const float *beta,
                                              float *c,
                                              int lld_c) {
    gpu::blas_api::OperationType op_a = get_blas_operation(trans_a);
    gpu::blas_api::OperationType op_b = get_blas_operation(trans_b);

    int ld_a = get_first(trans_a, m, k);
    int ld_b = get_first(trans_b, k, n);

    return gpu::blas_api::sgemm(
        handle, op_a, op_b, m, n, k, alpha, a, ld_a, b, ld_b, beta, c, lld_c);
}

gpu::blas_api::StatusType cublas_gemm_wrapper(gpu::blas_api::HandleType handle,
                                              char trans_a,
                                              char trans_b,
                                              int m,
                                              int n,
                                              int k,
                                              const double *alpha,
                                              const double *a,
                                              const double *b,
                                              const double *beta,
                                              double *c,
                                              int lld_c) {
    gpu::blas_api::OperationType op_a = get_blas_operation(trans_a);
    gpu::blas_api::OperationType op_b = get_blas_operation(trans_b);

    int ld_a = get_first(trans_a, m, k);
    int ld_b = get_first(trans_b, k, n);

    return gpu::blas_api::dgemm(
        handle, op_a, op_b, m, n, k, alpha, a, ld_a, b, ld_b, beta, c, lld_c);
}

// Note: Converting from std::complex to cuComplex and cuDoubleComple
//       works because they are binary compatible.
//
//       http://icl.cs.utk.edu/magma/forum/viewtopic.php?f=2&t=902
//
gpu::blas_api::StatusType cublas_gemm_wrapper(gpu::blas_api::HandleType handle,
                                              char trans_a,
                                              char trans_b,
                                              int m,
                                              int n,
                                              int k,
                                              const zfloat *alpha,
                                              const zfloat *a,
                                              const zfloat *b,
                                              const zfloat *beta,
                                              zfloat *c,
                                              int lld_c) {
    gpu::blas_api::OperationType op_a = get_blas_operation(trans_a);
    gpu::blas_api::OperationType op_b = get_blas_operation(trans_b);

    int ld_a = get_first(trans_a, m, k);
    int ld_b = get_first(trans_b, k, n);

    return gpu::blas_api::cgemm(
        handle,
        op_a,
        op_b,
        m,
        n,
        k,
        reinterpret_cast<const gpu::blas_api::ComplexFloatType *>(alpha),
        reinterpret_cast<const gpu::blas_api::ComplexFloatType *>(a),
        ld_a,
        reinterpret_cast<const gpu::blas_api::ComplexFloatType *>(b),
        ld_b,
        reinterpret_cast<const gpu::blas_api::ComplexFloatType *>(beta),
        reinterpret_cast<gpu::blas_api::ComplexFloatType *>(c),
        lld_c);
}

gpu::blas_api::StatusType cublas_gemm_wrapper(gpu::blas_api::HandleType handle,
                                              char trans_a,
                                              char trans_b,
                                              int m,
                                              int n,
                                              int k,
                                              const zdouble *alpha,
                                              const zdouble *a,
                                              const zdouble *b,
                                              const zdouble *beta,
                                              zdouble *c,
                                              int lld_c) {
    gpu::blas_api::OperationType op_a = get_blas_operation(trans_a);
    gpu::blas_api::OperationType op_b = get_blas_operation(trans_b);

    int ld_a = get_first(trans_a, m, k);
    int ld_b = get_first(trans_b, k, n);

    return gpu::blas_api::zgemm(
        handle,
        op_a,
        op_b,
        m,
        n,
        k,
        reinterpret_cast<const gpu::blas_api::ComplexDoubleType *>(alpha),
        reinterpret_cast<const gpu::blas_api::ComplexDoubleType *>(a),
        ld_a,
        reinterpret_cast<const gpu::blas_api::ComplexDoubleType *>(b),
        ld_b,
        reinterpret_cast<const gpu::blas_api::ComplexDoubleType *>(beta),
        reinterpret_cast<gpu::blas_api::ComplexDoubleType *>(c),
        lld_c);
}
#endif

template <typename Scalar>
void print_matrix(int m, int n, Scalar *A, char label) {
    std::cout << "Matrix " << label << std::endl;
    for (int i = 0; i < m; ++i) {
        for (int j = 0; j < n; ++j) {
            std::cout << A[j * m + i] << " ";
        }
        std::cout << std::endl;
    }
    std::cout << std::endl;
}

template <typename Scalar>
clock_t::time_point debug_gemm_start(Scalar *matrixA,
                                     Scalar *matrixB,
                                     Scalar *matrixC,
                                     int m,
                                     int n,
                                     int k,
                                     Scalar alpha,
                                     Scalar beta) {
    auto start = clock_t::now();
    if (std::abs(beta) > 0) {
        std::cout << "C (before) = " << std::endl;
        print_matrix(m, n, matrixC, 'C');
        auto C_partial = std::unique_ptr<Scalar[]>(new Scalar[m * n]);
        gemm(m, n, k, alpha, matrixA, m, matrixB, k, 0.0, C_partial.get(), m);
        std::cout << "C (partial) = " << std::endl;
        print_matrix(m, n, C_partial.get(), 'C');
    }
    return start;
}

template <typename Scalar>
clock_t::time_point debug_gemm_end(Scalar *matrixA,
                                   Scalar *matrixB,
                                   Scalar *matrixC,
                                   int m,
                                   int n,
                                   int k,
                                   Scalar alpha,
                                   Scalar beta) {
    std::cout << "After multiplication: " << std::endl;
    std::cout << "beta = " << beta << std::endl;
    print_matrix(m, k, matrixA, 'A');
    print_matrix(k, n, matrixB, 'B');
    print_matrix(m, n, matrixC, 'C');

    return std::chrono::high_resolution_clock::now();
}

#ifdef COSMA_HAVE_GPU
template <typename Scalar>
void local_multiply(gpu::mm_handle<Scalar> *gpu_ctx,
                    Scalar *matrixA,
                    Scalar *matrixB,
                    Scalar *matrixC,
                    int m,
                    int n,
                    int k,
                    Scalar alpha,
                    Scalar beta,
                    bool pin_host_buffers,
                    bool copy_c_back) {
    /*
    int rank = 0;
    MPI_Comm_rank(MPI_COMM_WORLD, &rank);
    if (rank == 0) {
        // print_matrix(m, k, matrixA, 'A');
        // print_matrix(k, n, matrixB, 'B');
        // std::cout << "m = " << m << ", n = " << n << ", k = " << k <<
    std::endl;
    }
    */
    int ld_a = m;
    int ld_b = k;
    int ld_c = m;

    gpu::gemm(*gpu_ctx,
              'N',
              'N',
              m,
              n,
              k,
              alpha,
              matrixA,
              ld_a,
              matrixB,
              ld_b,
              beta,
              matrixC,
              ld_c,
              pin_host_buffers,
              copy_c_back);

    /*
    if (rank == 0) {
        gpu::copy_to_host(gpu_ctx->get_full_device_buffer_c().data(), matrixC, m
    * n); print_matrix(m, n, matrixC, 'C'); std::cout << "alpha = " << alpha <<
    ", beta = " << beta << std::endl;
    }
    */
}
#endif

template <typename Scalar>
Scalar &get_element(Scalar *mat, int m, int n, int i, int j) {
    return mat[j * m + i];
}

template <typename Scalar>
void local_multiply_cpu(Scalar *matrixA,
                        Scalar *matrixB,
                        Scalar *matrixC,
                        int m,
                        int n,
                        int k,
                        Scalar alpha,
                        Scalar beta) {
    for (int mi = 0; mi < m; ++mi) {
        for (int ni = 0; ni < n; ++ni) {
            Scalar &Cvalue = get_element(matrixC, m, n, mi, ni);
            Cvalue *= beta;
            for (int ki = 0; ki < k; ++ki) {
                Scalar &Avalue = get_element(matrixA, m, k, mi, ki);
                Scalar &Bvalue = get_element(matrixB, k, n, ki, ni);
                Cvalue += alpha * Avalue * Bvalue;
            }
        }
    }
}

template <typename Scalar>
void local_multiply(cosma_context<Scalar> *ctx,
                    Scalar *matrixA,
                    Scalar *matrixB,
                    Scalar *matrixC,
                    int m,
                    int n,
                    int k,
                    Scalar alpha,
                    Scalar beta,
                    bool copy_c_back) {
#ifdef DEBUG
    auto t_start =
        debug_gemm_start(matrixA, matrixB, matrixC, m, n, k, alpha, beta);
#endif

#ifdef COSMA_HAVE_GPU
#ifdef COSMA_USE_UNIFIED_MEMORY
    if (ctx.unified_memory()) {
        PE(multiply_computation_gemm);
        auto status = cublas_gemm_wrapper(
            ctx->get_gpu_context()->get_gpu_context().get_blas_handle(0),
            'N',
            'N',
            m,
            n,
            k,
            &alpha,
            matrixA,
            matrixB,
            &beta,
            matrixC,
            m);

        gpu::check_blas_status(status);
        // we need explicit synchronization over the stream to trigger the copy
        // back to CPU memory
        hipStreamSynchronize(
            ctx->get_gpu_context()->get_gpu_context().get_stream(0));
        PL();
    } else {
#endif // COSMA_USE_UNIFIED_MEMORY
        PE(multiply_computation_pinning);
        if (ctx->pin_host_buffers) {
            ctx->get_memory_pool().pin(matrixA, m * k);
            ctx->get_memory_pool().pin(matrixB, k * n);
            // if (copy_c_back || std::abs(beta) > 0) {
            ctx->get_memory_pool().pin(matrixC, m * n);
            // }
        }
        PL();

        PE(multiply_computation_gemm);
        local_multiply(ctx->get_gpu_context(),
                       matrixA,
                       matrixB,
                       matrixC,
                       m,
                       n,
                       k,
                       alpha,
                       beta,
                       false,
                       copy_c_back);
        PL();
#ifdef COSMA_USE_UNIFIED_MEMORY
    }
#endif

#else
    PE(multiply_computation_gemm);
    gemm(m, n, k, alpha, matrixA, m, matrixB, k, beta, matrixC, m);
    PL();
#endif

#ifdef DEBUG
    auto t_end =
        debug_gemm_end(matrixA, matrixB, matrixC, m, n, k, alpha, beta);
    std::cout << "time(" << m << ", " << n << ", " << k << ") = "
              << std::chrono::duration_cast<ms_t>(t_end - t_start).count()
              << std::endl;
#endif
}

template <typename Scalar>
void local_multiply(Scalar *matrixA,
                    Scalar *matrixB,
                    Scalar *matrixC,
                    int m,
                    int n,
                    int k,
                    Scalar alpha,
                    Scalar beta,
                    bool copy_c_back) {
    local_multiply(get_context_instance<Scalar>(),
                   matrixA,
                   matrixB,
                   matrixC,
                   m,
                   n,
                   k,
                   alpha,
                   beta,
                   copy_c_back);
}

template <typename Scalar>
void local_multiply(context<Scalar> &ctx,
                    Scalar *matrixA,
                    Scalar *matrixB,
                    Scalar *matrixC,
                    int m,
                    int n,
                    int k,
                    Scalar alpha,
                    Scalar beta,
                    bool copy_c_back) {
    local_multiply(ctx.get(),
                   matrixA,
                   matrixB,
                   matrixC,
                   m,
                   n,
                   k,
                   alpha,
                   beta,
                   copy_c_back);
}

// explicit template instantiation using context
template void local_multiply<double>(cosma_context<double> *ctx,
                                     double *matrixA,
                                     double *matrixB,
                                     double *matrixC,
                                     int m,
                                     int n,
                                     int k,
                                     double alpha,
                                     double beta,
                                     bool copy_c_back);

template void local_multiply<float>(cosma_context<float> *ctx,
                                    float *matrixA,
                                    float *matrixB,
                                    float *matrixC,
                                    int m,
                                    int n,
                                    int k,
                                    float alpha,
                                    float beta,
                                    bool copy_c_back);

template void
local_multiply<std::complex<double>>(cosma_context<std::complex<double>> *ctx,
                                     std::complex<double> *matrixA,
                                     std::complex<double> *matrixB,
                                     std::complex<double> *matrixC,
                                     int m,
                                     int n,
                                     int k,
                                     std::complex<double> alpha,
                                     std::complex<double> beta,
                                     bool copy_c_back);

template void
local_multiply<std::complex<float>>(cosma_context<std::complex<float>> *ctx,
                                    std::complex<float> *matrixA,
                                    std::complex<float> *matrixB,
                                    std::complex<float> *matrixC,
                                    int m,
                                    int n,
                                    int k,
                                    std::complex<float> alpha,
                                    std::complex<float> beta,
                                    bool copy_c_back);

// explicit template instantiation using context - no pinning
template void local_multiply_cpu<double>(double *matrixA,
                                         double *matrixB,
                                         double *matrixC,
                                         int m,
                                         int n,
                                         int k,
                                         double alpha,
                                         double beta);

template void local_multiply_cpu<float>(float *matrixA,
                                        float *matrixB,
                                        float *matrixC,
                                        int m,
                                        int n,
                                        int k,
                                        float alpha,
                                        float beta);

template void
local_multiply_cpu<std::complex<double>>(std::complex<double> *matrixA,
                                         std::complex<double> *matrixB,
                                         std::complex<double> *matrixC,
                                         int m,
                                         int n,
                                         int k,
                                         std::complex<double> alpha,
                                         std::complex<double> beta);

template void
local_multiply_cpu<std::complex<float>>(std::complex<float> *matrixA,
                                        std::complex<float> *matrixB,
                                        std::complex<float> *matrixC,
                                        int m,
                                        int n,
                                        int k,
                                        std::complex<float> alpha,
                                        std::complex<float> beta);

// explicit template instantiation using context with unique_ptr context
template void local_multiply<double>(context<double> &ctx,
                                     double *matrixA,
                                     double *matrixB,
                                     double *matrixC,
                                     int m,
                                     int n,
                                     int k,
                                     double alpha,
                                     double beta,
                                     bool copy_c_back);

template void local_multiply<float>(context<float> &ctx,
                                    float *matrixA,
                                    float *matrixB,
                                    float *matrixC,
                                    int m,
                                    int n,
                                    int k,
                                    float alpha,
                                    float beta,
                                    bool copy_c_back);

template void
local_multiply<std::complex<double>>(context<std::complex<double>> &ctx,
                                     std::complex<double> *matrixA,
                                     std::complex<double> *matrixB,
                                     std::complex<double> *matrixC,
                                     int m,
                                     int n,
                                     int k,
                                     std::complex<double> alpha,
                                     std::complex<double> beta,
                                     bool copy_c_back);

template void
local_multiply<std::complex<float>>(context<std::complex<float>> &ctx,
                                    std::complex<float> *matrixA,
                                    std::complex<float> *matrixB,
                                    std::complex<float> *matrixC,
                                    int m,
                                    int n,
                                    int k,
                                    std::complex<float> alpha,
                                    std::complex<float> beta,
                                    bool copy_c_back);

// explicit instantiation without context
template void local_multiply<double>(double *matrixA,
                                     double *matrixB,
                                     double *matrixC,
                                     int m,
                                     int n,
                                     int k,
                                     double alpha,
                                     double beta,
                                     bool copy_c_back);

template void local_multiply<float>(float *matrixA,
                                    float *matrixB,
                                    float *matrixC,
                                    int m,
                                    int n,
                                    int k,
                                    float alpha,
                                    float beta,
                                    bool copy_c_back);

template void
local_multiply<std::complex<double>>(std::complex<double> *matrixA,
                                     std::complex<double> *matrixB,
                                     std::complex<double> *matrixC,
                                     int m,
                                     int n,
                                     int k,
                                     std::complex<double> alpha,
                                     std::complex<double> beta,
                                     bool copy_c_back);

template void local_multiply<std::complex<float>>(std::complex<float> *matrixA,
                                                  std::complex<float> *matrixB,
                                                  std::complex<float> *matrixC,
                                                  int m,
                                                  int n,
                                                  int k,
                                                  std::complex<float> alpha,
                                                  std::complex<float> beta,
                                                  bool copy_c_back);

#ifdef COSMA_HAVE_GPU
// explicit template instantiation using gpu context
template void local_multiply<double>(gpu::mm_handle<double> *ctx,
                                     double *matrixA,
                                     double *matrixB,
                                     double *matrixC,
                                     int m,
                                     int n,
                                     int k,
                                     double alpha,
                                     double beta,
                                     bool pin_host_buffers,
                                     bool copy_c_back);

template void local_multiply<float>(gpu::mm_handle<float> *ctx,
                                    float *matrixA,
                                    float *matrixB,
                                    float *matrixC,
                                    int m,
                                    int n,
                                    int k,
                                    float alpha,
                                    float beta,
                                    bool pin_host_buffers,
                                    bool copy_c_back);

template void
local_multiply<std::complex<double>>(gpu::mm_handle<std::complex<double>> *ctx,
                                     std::complex<double> *matrixA,
                                     std::complex<double> *matrixB,
                                     std::complex<double> *matrixC,
                                     int m,
                                     int n,
                                     int k,
                                     std::complex<double> alpha,
                                     std::complex<double> beta,
                                     bool pin_host_buffers,
                                     bool copy_c_back);

template void
local_multiply<std::complex<float>>(gpu::mm_handle<std::complex<float>> *ctx,
                                    std::complex<float> *matrixA,
                                    std::complex<float> *matrixB,
                                    std::complex<float> *matrixC,
                                    int m,
                                    int n,
                                    int k,
                                    std::complex<float> alpha,
                                    std::complex<float> beta,
                                    bool pin_host_buffers,
                                    bool copy_c_back);
#endif
} // namespace cosma


================================================
FILE: src/cosma/local_multiply.hpp
================================================
#pragma once
#include <cosma/context.hpp>

namespace cosma {

template <typename Scalar>
void local_multiply(cosma_context<Scalar> *ctx,
                    Scalar *a,
                    Scalar *b,
                    Scalar *c,
                    int m,
                    int n,
                    int k,
                    Scalar alpha,
                    Scalar beta,
                    bool copy_c_back);

template <typename Scalar>
void local_multiply_cpu(Scalar *a,
                        Scalar *b,
                        Scalar *c,
                        int m,
                        int n,
                        int k,
                        Scalar alpha,
                        Scalar beta);

template <typename scalar>
void local_multiply(context<scalar> &ctx,
                    scalar *a,
                    scalar *b,
                    scalar *c,
                    int m,
                    int n,
                    int k,
                    scalar alpha,
                    scalar beta,
                    bool copy_c_back);

template <typename scalar>
void local_multiply(scalar *a,
                    scalar *b,
                    scalar *c,
                    int m,
                    int n,
                    int k,
                    scalar alpha,
                    scalar beta,
                    bool copy_c_back);

#ifdef COSMA_HAVE_GPU
template <typename scalar>
void local_multiply(gpu::mm_handle<scalar> *gpu_ctx,
                    scalar *a,
                    scalar *b,
                    scalar *c,
                    int m,
                    int n,
                    int k,
                    scalar alpha,
                    scalar beta,
                    bool pin_host_buffers,
                    bool copy_c_back);
#endif
} // namespace cosma


================================================
FILE: src/cosma/mapper.cpp
================================================
#include <cosma/mapper.hpp>
#include <cosma/profiler.hpp>

namespace cosma {
Mapper::Mapper(char label,
               const Strategy& strategy,
               int rank)
    : label_(label)
    , strategy_(&strategy)
    , m_(strategy.n_rows(label))
    , n_(strategy.n_cols(label))
    , P_(strategy.P)
    , rank_(rank) {
    PE(preprocessing_matrices_mapper_sizes);
    skip_ranges_ = std::vector<int>(P_);
    rank_to_range_ =
        std::vector<std::vector<Interval2D>>(P_, std::vector<Interval2D>());
    mi_ = Interval(0, m_ - 1);
    ni_ = Interval(0, n_ - 1);
    Pi_ = Interval(0, P_ - 1);
    compute_sizes(mi_, ni_, Pi_, 0, strategy);
    initial_buffer_size_ = std::vector<size_t>(P_);
    range_offset_ = std::vector<std::vector<std::size_t>>(P_, std::vector<std::size_t>());

    for (size_t rank = 0; rank < P_; ++rank) {
        size_t size = 0;
        int matrix_id = 0;
        for (auto &matrix : rank_to_range_[rank]) {
            range_offset_[rank].push_back(size);
            size += matrix.size();
            matrix_id++;
        }
        range_offset_[rank].push_back(size);
        initial_buffer_size_[rank] = size;
        if (rank_to_range_[rank].size() == 0) {
            std::cout << "RANK " << rank << " DOES NOT OWN ANYTHING"
                      << std::endl;
        }
    }
    PL();

    PE(preprocessing_matrices_mapper_coordinates);
    // both partitions start with 0
    row_partition_set_ = std::set<int>{-1};
    col_partition_set_ = std::set<int>{-1};
    compute_range_to_rank();
    row_partition_ =
        std::vector<int>(row_partition_set_.begin(), row_partition_set_.end());
    col_partition_ =
        std::vector<int>(col_partition_set_.begin(), col_partition_set_.end());

    // compute_global_coord();
#ifdef DEBUG
    output_layout();
#endif
    PL();
    // if (rank_ >= P_) {
    //     return;
    // }
}

void Mapper::output_layout() {
    std::cout << "MATRIX " << label_ << " LAYOUT: " << std::endl;
    for (int i = 0; i < m_; ++i) {
        for (int j = 0; j < n_; ++j) {
            std::cout << local_coordinates(i, j).second << " ";
        }
        std::cout << "\n";
    }
    std::cout << "\n";

    std::cout << "Row partitions:\n";
    for (auto i = 0u; i < row_partition_.size(); i++) {
        std::cout << row_partition_[i] << " ";
    }
    std::cout << std::endl << std::endl;

    std::cout << "Column partitions:\n";
    for (auto i = 0u; i < col_partition_.size(); i++) {
        std::cout << col_partition_[i] << " ";
    }
    std::cout << std::endl << std::endl;
    ;
    /*
    std::cout << "Range to rank:\n";
    for (auto& pair : range_to_rank_) {
        std::cout << "Range " << pair.first << " is owned by rank " <<
    pair.second.first << " starting at local index " << pair.second.second <<
    std::endl;
    }
    std::cout << "\n\n";
    */

    std::cout << "Rank to range:\n";
    for (auto i = 0u; i < P_; ++i) {
        std::cout << "Rank " << i << " owns:" << std::endl;
        for (auto &range : rank_to_range_[i]) {
            std::cout << range << std::endl;
        }
        std::cout << "\n\n";
    }
    std::cout << "\n\n";
}

// finds the initial data layout
void Mapper::compute_sizes(Interval m,
                           Interval n,
                           Interval P,
                           int step,
                           const Strategy &strategy) {
    Interval2D submatrix(m, n);

    // base case
    if (strategy.final_step(step) || strategy.empty()) {
        auto submatrices = rank_to_range_[P.first()];
        rank_to_range_[P.first()].push_back(submatrix);
        return;
    }

    int divm = strategy.divisor_row(label_, step);
    int divn = strategy.divisor_col(label_, step);
    int div = strategy.divisor(step);

    // remember the previous number of fixed subranges
    // for each rank. this is only used in sequential step
    // we want the next sequential step to NOT modify the
    // subranges from the previous sequential step
    std::vector<int> prev_skip_ranges;
    if (strategy.sequential_step(step)) {
        for (int i = P.first(); i <= P.last(); ++i) {
            prev_skip_ranges.push_back(skip_ranges_[i]);
        }
    }

    for (int i = 0; i < div; ++i) {
        Interval newP = P.subinterval(div, i);
        // intervals of M, N and K that the current processor subinterval is
        // taking care of
        Interval newm = m.subinterval(divm, divm > 1 ? i : 0);
        Interval newn = n.subinterval(divn, divn > 1 ? i : 0);

        if (strategy.sequential_step(step)) {
            // invoke the substeps
            compute_sizes(newm, newn, P, step + 1, strategy);
            // skip these elements in rank_to_range_ to make the next sequential
            // step independent we assume that this many subranges are fixed in
            // this sequential step and we don't want that next sequential step
            // pop up some of the subranges stored in this sequential step (for
            // example if sequential step is followed by copy case)
            for (int rank = P.first(); rank <= P.last(); ++rank) {
                skip_ranges_[rank] = rank_to_range_[rank].size();
            }
            // don't go in other branches if dividing over absent dimension
            // it is still necessary to run the substep at least once
            // because rank_to_range_ fills up only at the end of the substep
            // and is being modified on the way back
            if (divm * divn == 1) {
                break;
            }
        } else {
            // no-copy case
            // here each substep will fill up different part of the sizes vector
            if (divm * divn > 1) {
                compute_sizes(newm, newn, newP, step + 1, strategy);
            }
            // copy case
            else {
                compute_sizes(m, n, newP, step + 1, strategy);

                for (int shift = 0; shift < newP.length(); ++shift) {
                    int rank = newP.first() + shift;

                    // go through all the submatrices this rank owns
                    auto &submatrices = rank_to_range_[rank];
                    for (int mat = skip_ranges_[rank]; mat < submatrices.size();
                         mat++) {
                        auto &matrix = submatrices[mat];

                        // and split it equally among all the ranks that
                        // this rank is communicating to in this round
                        for (int partition = 1; partition < div; ++partition) {
                            int target = partition * newP.length() + rank;
                            auto &vec = rank_to_range_[target];
                            vec.push_back(matrix.submatrix(div, partition));
                        }
                        matrix = matrix.submatrix(div, 0);
                    }
                }
                // invoke just one branch of substeps since others are the same
                // (in the copy case)
                break;
            }
        }
    }

    // if copy case is followed by a sequential step then it is necessary to not
    // permanently skip the subranges after all sequential steps since maybe the
    // first copy case wants to modify all the elements from the beginning of
    // the sequential step (to subdivide all the matrices as above)
    if (strategy.sequential_step(step)) {
        // clean after yourself, once all sequential steps have finished
        for (int i = P.first(); i <= P.last(); ++i) {
            skip_ranges_[i] = prev_skip_ranges[i - P.first()];
        }
    }
}

size_t Mapper::initial_size(int rank) const {
    // check if reorered
    // if (ranks_reordered) {
    //     rank = ranks_reordering[rank];
    // }
    if (rank < P_)
        return initial_buffer_size_[rank];
    return 0;
}

size_t Mapper::initial_size() const { return initial_size(rank_); }

std::vector<size_t> Mapper::all_initial_sizes() const {
    return initial_buffer_size_;
}

const std::vector<Interval2D> &Mapper::initial_layout(int rank) const {
    // check if reorered
    // if (ranks_reordered) {
    //     rank = ranks_reordering[rank];
    // }
    return rank_to_range_[rank];
}

const std::vector<Interval2D> &Mapper::initial_layout() const {
    return initial_layout(rank_);
}

std::vector<std::vector<Interval2D>> &Mapper::complete_layout() {
    return rank_to_range_;
}

// computes the inverse of rank_to_range_ by iterating through it
void Mapper::compute_range_to_rank() {
    for (auto rank = 0u; rank < P_; ++rank) {
        int matrix_id = 0;
        for (auto matrix : rank_to_range_[rank]) {
            range_to_rank_.insert({matrix, {rank, range_offset_[rank][matrix_id]}});
            row_partition_set_.insert(matrix.rows.last());
            col_partition_set_.insert(matrix.cols.last());
            ++matrix_id;
        }
    }
}

// (gi, gj) -> (local_id, rank)
std::pair<int, int> Mapper::local_coordinates(int gi, int gj) {
    Interval row_interval;
    Interval col_interval;

    // TODO: use segment tree to locate the interval which contains (gi, gj)
    for (auto row_int = 1u; row_int < row_partition_.size(); ++row_int) {
        if (row_partition_[row_int] >= gi && row_partition_[row_int - 1] < gi) {
            row_interval = Interval(row_partition_[row_int - 1] + 1,
                                    row_partition_[row_int]);
            break;
        }
    }

    for (auto col_int = 1u; col_int < col_partition_.size(); ++col_int) {
        if (col_partition_[col_int] >= gj && col_partition_[col_int - 1] < gj) {
            col_interval = Interval(col_partition_[col_int - 1] + 1,
                                    col_partition_[col_int]);
            break;
        }
    }
    // range containing gi, gj
    Interval2D range(row_interval, col_interval);

    if (!range.contains(gi, gj)) {
        std::cout << "Error in local_coordinates(" << gi << ", " << gj
                  << ") does not belong to the range " << range << std::endl;
    }

    int rank;
    int offset;
    int local_index;

    std::tie(rank, offset) = range_to_rank_[range];
    // if (ranks_reordered) {
    //     rank = ranks_reordering[rank];
    // }
    local_index = offset + range.local_index(gi, gj);

    return {local_index, rank};
}

void Mapper::compute_global_coord() {
    int index = 0;
    global_coord = std::vector<std::pair<int, int>>(initial_size());
    for (auto matrix_id = 0u; matrix_id < rank_to_range_[rank_].size();
         ++matrix_id) {
        Interval2D range = rank_to_range_[rank_][matrix_id];
        for (auto local = 0; local < range.size(); ++local, ++index) {
            global_coord[index] = range.global_index(local);
        }
    }
}

// local_id -> (gi, gj) (only for the current rank)
std::pair<int, int> Mapper::global_coordinates(int local_index) {
    if (local_index >= initial_size()) {
        return {-1, -1};
    }
    if (global_coord.size() == 0) {
        compute_global_coord();
    }
    return global_coord[local_index];
}

// (local_id, rank) -> (gi, gj)
std::pair<int, int> Mapper::global_coordinates(int local_index, int rank) {
    // if (ranks_reordered) {
    //     rank = ranks_reordering[rank];
    // }
    // TODO: use segment tree to locate with matrix of all the matrices
    // owned by rank contain the local_index
    for (auto matrix_id = 0u; matrix_id < rank_to_range_[rank].size();
         ++matrix_id) {
        // range_offset_ returns the beginning index of matrix_id range
        // if the beginning of the matrix >= local_index then this range
        // contains local_index
        if (range_offset_[rank][matrix_id + 1] > local_index) {
            Interval2D range = rank_to_range_[rank][matrix_id];
            local_index -= range_offset_[rank][matrix_id];

            int x, y;
            std::tie(x, y) = range.global_index(local_index);
            // std::cout << "Rank " << rank << ", local_index = " << local_index
            // << " -> (" <<  x << ", " << y << ")" << std::endl;
            return {x, y};
        }
    }
    return {-1, -1};
}

char Mapper::which_matrix() {
    return label_;
}

std::vector<std::size_t>& Mapper::local_blocks_offsets() {
    return range_offset_[rank_];
}

std::vector<Interval2D> Mapper::local_blocks() {
    if (rank_ < strategy_->P)
        return rank_to_range_[rank_];
    return {};
}

int Mapper::owner(Interval2D& block) {
    auto rank_and_offset_iterator = range_to_rank_.find(block);
    if (rank_and_offset_iterator == range_to_rank_.end()) {
        throw std::runtime_error("ERROR in mapper.cpp: the owner cannot be determined, the block not found.");
    }
    assert(rank_and_offset_iterator != range_to_rank_.end());
    auto rank_and_offset = rank_and_offset_iterator->second;
    auto rank = rank_and_offset.first;
    return rank;
}

costa::assigned_grid2D Mapper::get_layout_grid() {
    // **************************
    // create grid2D
    // **************************
    // prepare row intervals
    // and col intervals
    std::vector<int> rows_split;
    rows_split.reserve(row_partition_.size());
    for (const auto& tick : row_partition_) {
        rows_split.push_back(tick + 1);
    }
    std::vector<int> cols_split;
    cols_split.reserve(col_partition_.size());
    for (const auto& tick : col_partition_) {
        cols_split.push_back(tick + 1);
    }

    costa::grid2D grid(std::move(rows_split), std::move(cols_split));

    int n_blocks_row = grid.n_rows;
    int n_blocks_col = grid.n_cols;

    // **************************
    // create an assigned grid2D
    // **************************
    // create a matrix of ranks owning each block
    std::vector<std::vector<int>> owners(n_blocks_row,
                                         std::vector<int>(n_blocks_col));
    for (int i = 0; i < n_blocks_row; ++i) {
        auto r_inter = grid.row_interval(i);
        Interval row_interval(r_inter.start, r_inter.end - 1);
        for (int j = 0; j < n_blocks_col; ++j) {
            auto c_inter = grid.col_interval(j);
            Interval col_interval(c_inter.start, c_inter.end - 1);

            Interval2D range(row_interval, col_interval);
            owners[i][j] = owner(range);
        }
    }

    // create an assigned grid2D
    costa::assigned_grid2D assigned_grid(
        std::move(grid), std::move(owners), P_);

    return assigned_grid;
}

int Mapper::m() const {
    return m_;
}

int Mapper::n() const {
    return n_;
}

int Mapper::P() const {
    return P_;
}

int Mapper::rank() const {
    return rank_;
}

char Mapper::label() const {
    return label_;
}

const Strategy& Mapper::strategy() const {
    return *strategy_;
}

void Mapper::reorder_rank(int new_rank) {
    rank_ = new_rank;
}
} // namespace cosma


================================================
FILE: src/cosma/mapper.hpp
================================================
#pragma once

#include <cosma/interval.hpp>
#include <cosma/strategy.hpp>

#include <costa/grid2grid/transform.hpp>

#include <algorithm>
#include <cassert>
#include <fstream>
#include <memory>
#include <numeric>
#include <set>
#include <stdexcept>
#include <string>
#include <tuple>
#include <unordered_map>
#include <vector>

namespace cosma {
class Mapper {
  public:
    Mapper() = default;
    Mapper(char label,
           const Strategy& strategy,
           int rank);

    size_t initial_size(int rank) const;

    size_t initial_size() const;

    std::vector<size_t> all_initial_sizes() const;

    // rank -> list of ranges it owns initially
    const std::vector<Interval2D> &initial_layout(int rank) const;
    const std::vector<Interval2D> &initial_layout() const;
    std::vector<std::vector<Interval2D>> &complete_layout();

    // (gi, gj) -> (local_id, rank)
    std::pair<int, int> local_coordinates(int gi, int gj);

    // (local_id, rank) -> (gi, gj)
    std::pair<int, int> global_coordinates(int local_index, int rank);

    // local_id -> (gi, gj) (for local elements on the current rank)
    std::pair<int, int> global_coordinates(int local_index);

    // returns the label of the matrix (A, B or C)
    char which_matrix();

    // get a vector of offsets of each local block
    std::vector<std::size_t>& local_blocks_offsets();

    // get a vector of local blocks
    std::vector<Interval2D> local_blocks();

    // returns a rank owning given block
    int owner(Interval2D& block);

    costa::assigned_grid2D get_layout_grid();

    int m() const;
    int n() const;
    int P() const;
    int rank() const;
    char label() const;
    const Strategy& strategy() const;

    // changes the current rank to new_rank
    // this is used when we want to reorder ranks
    // in order to minimize the communication volume
    // if matrices are initially given in a different
    // data layout
    void reorder_rank(int new_rank);

  protected:
    // A, B or C
    char label_;
    /// Number of rows of the global atrix
    int m_;
    /// Number of columns of the global matrix
    int n_;
    /// Maximum number of rank in the global communicator
    size_t P_;
    int rank_;
    const Strategy* strategy_;

    // rank -> list of submatrices that this rank owns
    // the number of submatrices that this rank owns
    // is equal to the number of sequential steps in which
    // this matrix was divided
    std::vector<std::vector<Interval2D>> rank_to_range_;
    std::unordered_map<Interval2D, std::pair<int, std::size_t>> range_to_rank_;

    // rank -> total initial buffer size
    std::vector<size_t> initial_buffer_size_;

    // rank -> vector of sizes of all the ranges that this rank owns
    std::vector<std::vector<std::size_t>> range_offset_;

    Interval mi_;
    Interval ni_;
    Interval Pi_;

  private:
    // used by sequential steps.
    // rank -> number of submatrices fixed by the previous sequential step
    std::vector<int> skip_ranges_;

    std::set<int> row_partition_set_;
    std::set<int> col_partition_set_;
    std::vector<int> row_partition_;
    std::vector<int> col_partition_;

    std::vector<std::pair<int, int>> global_coord;

    void compute_sizes(Interval m,
                       Interval n,
                       Interval P,
                       int step,
                       const Strategy &strategy);
    void output_layout();
    void compute_range_to_rank();

    void compute_global_coord();
};
} // namespace cosma


================================================
FILE: src/cosma/math_utils.cpp
================================================
#include <cosma/math_utils.hpp>

namespace cosma {
int math_utils::gcd(int a, int b) { return b == 0 ? a : gcd(b, a % b); }

long long math_utils::divide_and_round_up(long long x, long long y) {
    return 1 + ((x - 1) / y);
}

int math_utils::next_multiple_of(int n_to_round, int multiple) {
    if (multiple == 0)
        return n_to_round;

    int remainder = n_to_round % multiple;
    if (remainder == 0)
        return n_to_round;

    return n_to_round + multiple - remainder;
}

// find all divisors of a given number n
std::vector<int> math_utils::find_divisors(int n) {
    std::vector<int> divs;
    for (int i = 1; i <= n; ++i) {
        if (n % i == 0) {
            divs.push_back(i);
        }
    }
    return divs;
}

// Finds the divisors dm, dn and dk for m, n and k respectively, such that
// 1. dm * dn * dk <= P
// 2. dm <= min(m, n, m/local_problem_size)
// 3. dn <= min(n, k, n/local_problem_size)
// 5. dk <= min(k, n, k/local_problem_size)
// 6. balanced: m/dm approx= n/dn approx= k/dk
//
// For the upper bound on divisors, the following conditions are taken into account:
//     - layout-conditions: the matrix that is not split, i.e. which does not
//                          contain the split dimension, must have #columns
//                          at least as large as the divisor of that dimension
//     - min-problem-size: the minimum size of the corresponding dimension
//                         after splitting should be at least min_problem_size
//     - mathematical: divisor or some dimension should be at least 1 (i.e.
// 
std::tuple<int, int, int>
math_utils::balanced_divisors(long long m, long long n, long long k,
                              int P, int min_local_problem_size) {
    // each divisor can be at most the value of the dimension
    auto max_divm = std::min(
                            // layout condition + mathematical
                            // the matrix that is not split here (i.e. B)
                            // must have #colums >= divm
                            std::min(m, n), 
                            // min_problem_size condition
                            m/min_local_problem_size); // min_prob_size condition
    max_divm = std::max(1LL, max_divm);
    auto max_divn = std::min(std::min(k, n),
                             n/min_local_problem_size);
    max_divn = std::max(1LL, max_divn);
    auto max_divk = std::min(std::min(k, n),
                             k/min_local_problem_size);
    max_divk = std::max(1LL, max_divk);

    // protect from overflow by adding redundant checks
    if (max_divm < P && max_divn < P && max_divk < P
            && max_divm * max_divn < P
            && max_divm * max_divn * max_divk < P) {
        P = (int) (max_divm * max_divn * max_divk);
    }

    // sort the dimensions
    std::vector<int> dims = {(int)m, (int)n, (int)k};
    std::sort(dims.begin(), dims.end());

    double target_tile_size = 0.0;
    // avoid overflow
    if (dims[2] >= P) {
        target_tile_size = std::cbrt(1.0 * dims[2]/ P * dims[0] * dims[1]);
    } else if (dims[1] * dims[2] >= P) {
        target_tile_size = std::cbrt(1.0 * dims[1] * dims[2] / P * dims[0]);
    } else {
        target_tile_size = std::cbrt(1.0 * dims[0] * dims[1] * dims[2] / P);
    }

    int error = std::numeric_limits<int>::max();
    int divm = 1;
    int divn = 1;
    int divk = 1;

    for (const int &div1 : find_divisors(P)) {
        if (div1 > max_divm) break;

        int error_lower_bound = std::abs(m / div1 - target_tile_size);
        if (error_lower_bound > error) {
            continue;
        }
        for (const int &div2 : find_divisors(P / div1)) {
            if (div2 > max_divn) break;
            int div3 = std::min((P / div1) / div2, (int) max_divk);
            int current_error = std::abs(m / div1 - target_tile_size) +
                                std::abs(n / div2 - target_tile_size) +
                                std::abs(k / div3 - target_tile_size);
            // prefer new divisors if they make tile size closer to the target size
            // or if they utilize more processors
            if (div1 * div2 * div3 > divm * divn * divk ||
                div1 * div2 * div3 == divm * divn * divk && current_error < error) {
                divm = div1;
                divn = div2;
                divk = div3;

                error = current_error;
            }
        }
    }
    return std::make_tuple(divm, divn, divk);
}

// find all prime factors of a given number n
std::vector<int> math_utils::decompose(int n) {
    std::vector<int> factors;
    int orig_n = n;

    // number of 2s that divide n
    while (n % 2 == 0) {
        factors.push_back(2);
        n = n / 2;
    }

    // n must be odd at this point.
    // we can skip one element
    for (int i = 3; i <= std::sqrt(n); i = i + 2) {
        // while i divides n, print i and divide n
        while (n % i == 0) {
            factors.push_back(i);
            n = n / i;
        }
    }

    // This condition is to handle the case when n
    // is a prime number greater than 2
    if (n > 2) {
        factors.push_back(n);
    }

    // std::cout << "factors of " << orig_n << " are: ";
    // for (const auto& el : factors)
    //     std::cout << el << ", ";
    // std::cout << std::endl;
    return factors;
}

int math_utils::closest_divisor(int P, int dimension, double target) {
    int divisor = 1;
    int error;
    int best_error = std::numeric_limits<int>::max();
    int best_div = 1;

    for (int i : find_divisors(P)) {
        error = std::abs(1.0 * dimension / i - target);

        if (error <= best_error) {
            best_div = i;
            best_error = error;
        }
    }

    return best_div;
}

int math_utils::int_div_up(int numerator, int denominator) {
    return numerator / denominator +
           (((numerator < 0) ^ (denominator > 0)) && (numerator % denominator));
}

double math_utils::square_score(int rows, int cols) {
    if (rows == 0 || cols == 0) {
        std::runtime_error("square_score function called with zero-dimension.");
    }
    double ratio1 = 1.0 * rows / cols;
    double ratio2 = 1.0 * cols / rows;
    return (ratio1 + ratio2) / (2.0 * std::max(ratio1, ratio2));
}

double math_utils::square_score(int m, int n, int k) {
    double score_a = square_score(m, k);
    double score_b = square_score(k, n);
    double score_c = square_score(m, n);

    return score_a * score_b * score_c;
}

std::pair<int, int> math_utils::invert_cantor_pairing(int z) {
    int w = (int)std::floor((std::sqrt(8 * z + 1) - 1) / 2);
    int t = (w * w + w) / 2;
    int y = z - t;
    int x = w - y;
    return {x, y};
}

// maps (N, N) -> N
int math_utils::cantor_pairing(const int i, const int j) {
    int sum = i + j;
    return (sum * (sum + 1)) / 2 + j;
}

// check if the number is a power of 2
bool math_utils::is_power_of_2(std::size_t n) {
    return !(n & (n - 1));
}

// find the next power of 2 that is > than n
std::size_t 
math_utils::next_greater_power_of_2(std::size_t n, std::size_t power_of_2) {
    return n == 0
               ? power_of_2 
               : next_greater_power_of_2(n - (n & power_of_2), power_of_2 << 1);
}

// find the next power of 2 that is >= n
std::size_t 
math_utils::next_power_of_2(std::size_t n) {
    return is_power_of_2(n) ? n : next_greater_power_of_2(n);
}
} // namespace cosma


================================================
FILE: src/cosma/math_utils.hpp
================================================
#pragma once

#include <algorithm>
#include <cmath>
#include <iostream>
#include <limits>
#include <tuple>
#include <vector>
#include <complex>

namespace cosma {

// check if the type in template is std::complex or not
template<typename T>
struct is_complex : std::false_type {};

template<typename T>
struct is_complex<std::complex<T>> : std::true_type {};

namespace math_utils {
// greates common divisor of a and b
int gcd(int a, int b);

// divides and rounds up long long integers
long long divide_and_round_up(long long x, long long y);

// round to next multiple
int next_multiple_of(int n_to_round, int multiple);

// check if the number is a power of 2
bool is_power_of_2(std::size_t n);

// find the next power of 2 that is > than n
std::size_t next_greater_power_of_2(std::size_t n, std::size_t power_of_2 = 1);

// find the next power of 2 that is >= n
std::size_t next_power_of_2(std::size_t n);

// find all divisors of n
std::vector<int> find_divisors(int n);
// Finds the divisors dm, dn and dk for m, n and k respectively, such that
// 1. dm * dn * dk <= P
// 2. dm <= min(m, n, m/local_problem_size)
// 3. dn <= min(n, k, n/local_problem_size)
// 5. dk <= min(k, n, k/local_problem_size)
// 6. balanced: m/dm approx= n/dn approx= k/dk
//
// For the upper bound on divisors, the following conditions are taken into account:
//     - layout-conditions: the matrix that is not split, i.e. which does not
//                          contain the split dimension, must have #columns
//                          at least as large as the divisor of that dimension
//     - min-problem-size: the minimum size of the corresponding dimension
//                         after splitting should be at least min_problem_size
//     - mathematical: divisor or some dimension should be at least 1 (i.e.
// 
std::tuple<int, int, int>
balanced_divisors(long long m, long long n, long long k, 
                          int P, int min_problem_size);

// prime decomposition of n
std::vector<int> decompose(int n);

// finds divisor of P closest to dimensions/target
int closest_divisor(int P, int dimension, double target);

// divide numerator by denominator and round it up to int
int int_div_up(int numerator, int denominator);

// returns a value (0, 1], that describes how close to the square matrix,
// the matrix with dimensions rows x cols is.
double square_score(int rows, int cols);

// returns a value (0, 1] that describes how close the performance
// of gemm(m, n, k) is to the performance of a corresponding square case
// gemm(q, q, q) where q = cubic_root(m*n*k)
double square_score(int m, int n, int k);

int cantor_pairing(const int i, const int j);
std::pair<int, int> invert_cantor_pairing(int z);
}; // namespace math_utils
} // namespace cosma


================================================
FILE: src/cosma/matrix.cpp
================================================
#include <cosma/matrix.hpp>
#include <mpi.h>

#include <complex>

namespace cosma {

extern template class Buffer<double>;

// using a pointer to cosma_context
template <typename T>
CosmaMatrix<T>::CosmaMatrix(cosma_context<T> *ctxt,
                            char label,
                            const Strategy &strategy,
                            int rank,
                            bool dry_run)
    : ctxt_(ctxt)
    , mapper_(Mapper(label, strategy, rank))
    , rank_(mapper_.rank())
    , strategy_(mapper_.strategy())
    , label_(mapper_.label())
    , m_(mapper_.m())
    , n_(mapper_.n())
    , P_(mapper_.P()) {

    if (rank < P_) {
        layout_ = Layout(&mapper_);

        buffer_ =
            buffer_t(ctxt_, &mapper_, &layout_, dry_run);
    }
}

// with given mapper
template <typename T>
CosmaMatrix<T>::CosmaMatrix(cosma_context<T> *ctxt,
                            Mapper &&mapper,
                            int rank,
                            bool dry_run)
    : ctxt_(ctxt)
    , mapper_(std::forward<Mapper>(mapper))
    , rank_(rank)
    , strategy_(mapper_.strategy())
    , label_(mapper_.label())
    , m_(mapper_.m())
    , n_(mapper_.n())
    , P_(mapper_.P()) {

    mapper_.reorder_rank(rank);
    if (rank < P_) {
        layout_ = Layout(&mapper_);
        buffer_ =
            buffer_t(ctxt_, &mapper_, &layout_, dry_run);
    }
}

// using custom context
template <typename T>
CosmaMatrix<T>::CosmaMatrix(std::unique_ptr<cosma_context<T>> &ctxt,
                            char label,
                            const Strategy &strategy,
                            int rank,
                            bool dry_run)
    : CosmaMatrix(ctxt.get(), label, strategy, rank, dry_run) {}

// with given mapper
template <typename T>
CosmaMatrix<T>::CosmaMatrix(std::unique_ptr<cosma_context<T>> &ctxt,
                            Mapper &&mapper,
                            int rank,
                            bool dry_run)
    : CosmaMatrix(ctxt.get(), std::forward<Mapper &&>(mapper), rank, dry_run) {}

// using global (singleton) context
template <typename T>
CosmaMatrix<T>::CosmaMatrix(char label,
                            const Strategy &strategy,
                            int rank,
                            bool dry_run)
    : CosmaMatrix(get_context_instance<T>(), label, strategy, rank, dry_run) {}

// with given mapper
template <typename T>
CosmaMatrix<T>::CosmaMatrix(Mapper &&mapper, int rank, bool dry_run)
    : CosmaMatrix(get_context_instance<T>(),
                  std::forward<Mapper &&>(mapper),
                  rank,
                  dry_run) {}

template <typename T>
int CosmaMatrix<T>::m() {
    return m_;
}

template <typename T>
int CosmaMatrix<T>::n() {
    return n_;
}

template <typename T>
char CosmaMatrix<T>::label() {
    return label_;
}

template <typename T>
int CosmaMatrix<T>::buffer_index() {
    if (rank_ < P_) {
        return buffer_.buffer_index();
    }
    return -1;
}

template <typename T>
void CosmaMatrix<T>::set_buffer_index(int idx) {
    if (rank_ < P_) {
        buffer_.set_buffer_index(idx);
    }
}

template <typename T>
typename CosmaMatrix<T>::scalar_t *CosmaMatrix<T>::buffer_ptr() {
    if (rank_ < P_) {
        return buffer_.buffer_ptr();
    }
    return nullptr;
}

template <typename T>
size_t CosmaMatrix<T>::buffer_size() {
    if (rank_ < P_) {
        return buffer_.buffer_size();
    }
    return 0;
}

template <typename T>
typename CosmaMatrix<T>::scalar_t *CosmaMatrix<T>::reshuffle_buffer_ptr() {
    if (rank_ < P_) {
        return buffer_.reshuffle_buffer_ptr();
    }
    return nullptr;
}

template <typename T>
typename CosmaMatrix<T>::scalar_t *CosmaMatrix<T>::reduce_buffer_ptr() {
    if (rank_ < P_) {
        return buffer_.reduce_buffer_ptr();
    }
    return nullptr;
}

template <typename T>
void CosmaMatrix<T>::swap_reduce_buffer_with(size_t buffer_idx) {
    if (rank_ < P_) {
        buffer_.swap_reduce_buffer_with(buffer_idx);
    }
}

template <typename T>
void CosmaMatrix<T>::advance_buffer() {
    if (rank_ < P_) {
        buffer_.advance_buffer();
    }
}

template <typename T>
const std::vector<Interval2D> &CosmaMatrix<T>::initial_layout(int rank) const {
    return mapper_.initial_layout(rank);
}

template <typename T>
const std::vector<Interval2D> &CosmaMatrix<T>::initial_layout() const {
    return mapper_.initial_layout();
}

// (gi, gj) -> (local_id, rank)
template <typename T>
std::pair<int, int> CosmaMatrix<T>::local_coordinates(int gi, int gj) {
    return mapper_.local_coordinates(gi, gj);
}

// (local_id, rank) -> (gi, gj)
template <typename T>
std::pair<int, int> CosmaMatrix<T>::global_coordinates(int local_index,
                                                       int rank) {
    return mapper_.global_coordinates(local_index, rank);
}

// local_id -> (gi, gj) for local elements on the current rank
template <typename T>
std::pair<int, int> CosmaMatrix<T>::global_coordinates(int local_index) {
    return mapper_.global_coordinates(local_index);
}

template <typename T>
typename CosmaMatrix<T>::scalar_t *CosmaMatrix<T>::matrix_pointer() {
    if (rank_ < P_) {
        return buffer_.initial_buffer_ptr();
    }
    return nullptr;
}

template <typename T>
const typename CosmaMatrix<T>::scalar_t *
CosmaMatrix<T>::matrix_pointer() const {
    if (rank_ < P_) {
        return buffer_.initial_buffer_ptr();
    }
    return nullptr;
}

template <typename T>
size_t CosmaMatrix<T>::matrix_size() const {
    return mapper_.initial_size();
}

template <typename T>
size_t CosmaMatrix<T>::matrix_size(int rank) const {
    return mapper_.initial_size(rank);
}

template <typename T>
char CosmaMatrix<T>::which_matrix() {
    return label_;
}

template <typename T>
int CosmaMatrix<T>::shift(int rank, int seq_bucket) {
    if (rank < P_) {
        int offset = layout_.offset(rank, seq_bucket);
        current_mat += offset;
        return offset;
    }
    return -1;
}

template <typename T>
int CosmaMatrix<T>::shift(int seq_bucket) {
    if (rank_ < P_) {
        int offset = layout_.offset(seq_bucket);
        current_mat += offset;
        return offset;
    }
    return -1;
}

template <typename T>
void CosmaMatrix<T>::unshift(int offset) {
    if (rank_ < P_) {
        current_mat -= offset;
    }
}

template <typename T>
int CosmaMatrix<T>::seq_bucket(int rank) {
    if (rank < P_) {
        return layout_.seq_bucket(rank);
    }
    return -1;
}

template <typename T>
int CosmaMatrix<T>::seq_bucket() {
    if (rank_ < P_) {
        return layout_.seq_bucket();
    }
    return -1;
}

template <typename T>
void CosmaMatrix<T>::update_buckets(Interval &P, Interval2D &range) {
    if (rank_ < P_) {
        layout_.update_buckets(P, range);
    }
}

template <typename T>
std::vector<int> CosmaMatrix<T>::seq_buckets(Interval &newP) {
    if (rank_ < P_) {
        return layout_.seq_buckets(newP);
    }
    return {};
}

template <typename T>
void CosmaMatrix<T>::set_seq_buckets(Interval &newP,
                                     std::vector<int> &pointers) {
    if (rank_ < P_) {
        layout_.set_seq_buckets(newP, pointers);
    }
}

template <typename T>
int CosmaMatrix<T>::size(int rank) {
    if (rank < P_) {
        return layout_.size(rank);
    }
    return 0;
}

template <typename T>
int CosmaMatrix<T>::size() {
    if (rank_ < P_) {
        return layout_.size();
    }
    return 0;
}

template <typename T>
void CosmaMatrix<T>::buffers_before_expansion(
    Interval &P,
    Interval2D &range,
    std::vector<std::vector<int>> &size_per_rank,
    std::vector<int> &total_size_per_rank) {
    if (rank_ < P_) {
        layout_.buffers_before_expansion(
            P, range, size_per_rank, total_size_per_rank);
    }
}

template <typename T>
void CosmaMatrix<T>::buffers_after_expansion(
    Interval &P,
    Interval &newP,
    std::vector<std::vector<int>> &size_per_rank,
    std::vector<int> &total_size_per_rank,
    std::vector<std::vector<int>> &new_size,
    std::vector<int> &new_total) {
    if (rank_ < P_) {
        layout_.buffers_after_expansion(
            P, newP, size_per_rank, total_size_per_rank, new_size, new_total);
    }
}

template <typename T>
void CosmaMatrix<T>::set_sizes(Interval &newP,
                               std::vector<std::vector<int>> &size_per_rank,
                               int offset) {
    if (rank_ < P_) {
        layout_.set_sizes(newP, size_per_rank, offset);
    }
}

template <typename T>
void CosmaMatrix<T>::set_sizes(Interval &newP,
                               std::vector<std::vector<int>> &size_per_rank) {
    if (rank_ < P_) {
        layout_.set_sizes(newP, size_per_rank);
    }
}

template <typename T>
void CosmaMatrix<T>::set_sizes(int rank, std::vector<int> &sizes, int start) {
    if (rank < P_) {
        layout_.set_sizes(rank, sizes, start);
    }
}

template <typename T>
typename CosmaMatrix<T>::scalar_t &CosmaMatrix<T>::
operator[](const typename std::vector<scalar_t>::size_type index) {
    if (index < matrix_size()) {
        std::runtime_error("Matrix index out of bounds.");
    }
    return matrix_pointer()[index];
}

template <typename T>
typename CosmaMatrix<T>::scalar_t CosmaMatrix<T>::
operator[](const typename std::vector<scalar_t>::size_type index) const {
    if (index < matrix_size()) {
        std::runtime_error("Matrix index out of bounds.");
    }
    return matrix_pointer()[index];
}

template <typename T>
typename CosmaMatrix<T>::scalar_t *CosmaMatrix<T>::current_matrix() {
    return current_mat;
}

template <typename T>
void CosmaMatrix<T>::initialize() {
    current_mat = matrix_pointer();
}

template <typename T>
void CosmaMatrix<T>::set_current_matrix(scalar_t *mat) {
    current_mat = mat;
}

template <typename T>
costa::grid_layout<T> CosmaMatrix<T>::get_grid_layout() {
    // **************************
    // get an assigned grid2D
    // **************************
    auto assigned_grid = mapper_.get_layout_grid();

    // **************************
    // create local memory view
    // **************************
    // get coordinates of current rank in a rank decomposition
    std::vector<costa::block<T>> loc_blocks;
    for (auto matrix_id = 0u; matrix_id < mapper_.local_blocks().size();
         ++matrix_id) {
        Interval2D range = mapper_.local_blocks()[matrix_id];
        int offset = mapper_.local_blocks_offsets()[matrix_id];

        costa::interval row_interval(range.rows.first(),
                                         range.rows.last() + 1);
        costa::interval col_interval(range.cols.first(),
                                         range.cols.last() + 1);

        int stride = row_interval.length();

        costa::block<T> b(assigned_grid,
                              row_interval,
                              col_interval,
                              matrix_pointer() + offset,
                              stride);

        assert(b.non_empty());

        loc_blocks.push_back(b);
    }
    costa::local_blocks<T> local_memory(std::move(loc_blocks));

    return {std::move(assigned_grid), std::move(local_memory), 'C'};
}

// allocates initial buffers (turns off dryrun)
template <typename T>
void CosmaMatrix<T>::allocate() {
    if (rank_ < P_) {
        bool dryrun = false;
        buffer_.allocate_initial_buffers(dryrun);
        initialize();
    }
}

template <typename T>
void CosmaMatrix<T>::allocate_communication_buffers() {
    if (rank_ < P_)
        buffer_.allocate_communication_buffers();
}

template <typename T>
void CosmaMatrix<T>::free_communication_buffers() {
    if (rank_ < P_)
        buffer_.free_communication_buffers();
}

template <typename T>
cosma_context<T> *CosmaMatrix<T>::get_context() {
    return ctxt_;
}

template <typename T>
int CosmaMatrix<T>::rank() const {
    return rank_;
}

// total memory = initial memory + communication memory
template <typename T>
std::vector<size_t> CosmaMatrix<T>::required_memory() {
    if (rank_ < P_)
        return buffer_.get_all_buffer_sizes();
    return std::vector<std::size_t>{};
}

// Explicit instantiations
//
template class CosmaMatrix<float>;
template class CosmaMatrix<double>;
template class CosmaMatrix<std::complex<float>>;
template class CosmaMatrix<std::complex<double>>;

} // namespace cosma


================================================
FILE: src/cosma/matrix.hpp
================================================
#pragma once
#include <algorithm>
#include <cassert>
#include <fstream>
#include <iostream>
#include <memory>
#include <numeric>
#include <set>
#include <stdexcept>
#include <string>
#include <tuple>
#include <unordered_map>
#include <vector>

#include <cosma/context.hpp>
#include <cosma/buffer.hpp>
#include <cosma/interval.hpp>
#include <cosma/layout.hpp>
#include <cosma/mapper.hpp>
#include <cosma/strategy.hpp>

#include <costa/grid2grid/transform.hpp>

namespace cosma {

template <typename Scalar>
class CosmaMatrix {
  public:
    using scalar_t = Scalar;
    using buffer_t = Buffer<scalar_t>;

    // using a pointer to cosma_context
    CosmaMatrix(cosma_context<Scalar> *ctxt,
                char label,
                const Strategy &strategy,
                int rank,
                bool dry_run = false);
    CosmaMatrix(cosma_context<Scalar> *ctxt,
                Mapper &&mapper,
                int rank,
                bool dry_run = false);

    // using a custom context
    CosmaMatrix(std::unique_ptr<cosma_context<Scalar>> &ctxt,
                char label,
                const Strategy &strategy,
                int rank,
                bool dry_run = false);
    CosmaMatrix(std::unique_ptr<cosma_context<Scalar>> &ctxt,
                Mapper &&mapper,
                int rank,
                bool dry_run = false);

    // using global (singleton) context
    CosmaMatrix(char label,
                const Strategy &strategy,
                int rank,
                bool dry_run = false);
    CosmaMatrix(Mapper &&mapper, int rank, bool dry_run = false);

    int m();
    int n();
    char label();

    // **********************************************
    // METHODS FROM mapper.hpp
    // **********************************************
    // (gi, gj) -> (local_id, rank)
    std::pair<int, int> local_coordinates(int gi, int gj);
    // (local_id, rank) -> (gi, gj)
    std::pair<int, int> global_coordinates(int local_index, int rank);
    // local_id -> (gi, gj) for local elements on the current rank
    std::pair<int, int> global_coordinates(int local_index);

    char which_matrix();

    const std::vector<Interval2D> &initial_layout(int rank) const;
    const std::vector<Interval2D> &initial_layout() const;

    // **********************************************
    // METHODS FROM layout.hpp
    // **********************************************
    int shift(int rank, int seq_bucket);
    int shift(int seq_bucket);
    void unshift(int offset);

    void update_buckets(Interval &P, Interval2D &range);
    int seq_bucket(int rank);
    int seq_bucket();
    std::vector<int> seq_buckets(Interval &newP);
    void set_seq_buckets(Interval &newP, std::vector<int> &pointers);

    int size(int rank);
    int size();

    void buffers_before_expansion(Interval &P,
                                  Interval2D &range,
                                  std::vector<std::vector<int>> &size_per_rank,
                                  std::vector<int> &total_size_per_rank);

    void buffers_after_expansion(Interval &P,
                                 Interval &newP,
                                 std::vector<std::vector<int>> &size_per_rank,
                                 std::vector<int> &total_size_per_rank,
                                 std::vector<std::vector<int>> &new_size,
                                 std::vector<int> &new_total);

    void set_sizes(Interval &newP,
                   std::vector<std::vector<int>> &size_per_rank,
                   int offset);
    void set_sizes(Interval &newP,
                   std::vector<std::vector<int>> &size_per_rank);
    void set_sizes(int rank, std::vector<int> &sizes, int start);

    // **********************************************
    // METHODS FROM buffer.hpp
    // **********************************************
    // prepares next buffer
    void advance_buffer();
    // returns the current buffer id
    int buffer_index();
    // sets the current buffer to idx
    void set_buffer_index(int idx);
    // returns the pointer to the current buffer
    scalar_t *buffer_ptr();
    size_t buffer_size();
    // returns the pointer to the reshuffle buffer
    // that is used when n_blocks > 1 (i.e. when sequential steps are present)
    // as a temporary buffer in which the data is reshuffled.
    scalar_t *reshuffle_buffer_ptr();
    // pointer to the reduce buffer that is used as a
    // temporary buffer in parallel-reduce (two-sided) communicator
    // in case when beta > 0 in that step
    scalar_t *reduce_buffer_ptr();
    // swaps current and reduce buffers
    void swap_reduce_buffer_with(size_t buffer_idx);

    // **********************************************
    // NEW METHODS
    // **********************************************
    scalar_t &operator[](const typename std::vector<scalar_t>::size_type index);
    scalar_t
    operator[](const typename std::vector<scalar_t>::size_type index) const;

    // outputs matrix in a format:
    //      row, column, value
    // for all local elements on the current rank
    template <typename Scalar_>
    friend std::ostream &operator<<(std::ostream &os,
                                    const CosmaMatrix<Scalar_> &mat);

    // get a pointer to the initial/final data
    scalar_t *matrix_pointer();
    const scalar_t *matrix_pointer() const;
    size_t matrix_size() const;
    size_t matrix_size(int rank) const;

    // pointer to send buffer
    // scalar_t* buffer_ptr();
    // std::vector<scalar_t, mpi_allocator<scalar_t>>& buffer();
    // pointer to current matrix (send buffer)
    scalar_t *current_matrix();

    // this should be invoked after all allocations are finished
    // it will query the memory pool for the current buffers
    void initialize();

    void set_current_matrix(scalar_t *mat);

    costa::grid_layout<scalar_t> get_grid_layout();

    void allocate_communication_buffers();
    void free_communication_buffers();

    cosma_context<scalar_t> *get_context();

    int rank() const;

    // returns an array of all buffer sizes
    // that are used for this matrix.
    // This includes the buffer for initial matrices
    // as well as additional communication buffers.
    std::vector<size_t> required_memory();

    // turns off dry-run mode, allocate initial buffers
    void allocate();

  protected:
    cosma_context<scalar_t> *ctxt_;
    // mapper containing information
    // about the global grid (data layout)
    Mapper mapper_;
    // current rank
    int rank_;
    // strategy
    const Strategy &strategy_;

    // A, B or C
    char label_;
    /// Number of rows of the global matrix
    int m_;
    /// Number of columns of the global matrix
    int n_;
    /// Maximum number of rank in the global communicator
    size_t P_;

    /// temporary local matrix
    size_t current_mat_id;
    scalar_t *current_mat;

    Layout layout_;
    buffer_t buffer_;
};

template <typename Scalar>
std::ostream &operator<<(std::ostream &os, CosmaMatrix<Scalar> &mat) {
    for (auto local = 0; local < mat.matrix_size(); ++local) {
        auto value = mat[local];
        int row, col;
        std::tie(row, col) = mat.global_coordinates(local);
        os << row << " " << col << " " << value << std::endl;
    }
    return os;
}

} // namespace cosma


================================================
FILE: src/cosma/memory_pool.cpp
================================================
#include <cassert>
#include <complex>
#include <cosma/memory_pool.hpp>
#include <iostream>
#include <mpi.h>

template <typename T>
cosma::memory_pool<T>::memory_pool() {}

template <typename T>
cosma::memory_pool<T>::~memory_pool() {
    this->unpin_all();
}

template <typename T>
size_t cosma::memory_pool<T>::get_buffer_id(size_t size) {
    auto alignment = aligned_allocator<T>::get_alignment();
    assert(size > 0);

    // take the alignment into account
    if (alignment > 0) {
        size += aligned_allocator<T>::get_alignment_padding(size);
    }

    size_t offset = pool_size_;
    pool_size_ += size;
    ++n_buffers_;

    assert(alignment <= 0 || aligned_allocator<T>::get_alignment_padding(offset) == 0);
    assert(alignment <= 0 || aligned_allocator<T>::get_alignment_padding(pool_size_) == 0);
    return offset;
}

template <typename T>
T* cosma::memory_pool<T>::get_buffer_pointer(size_t id) {
    auto alignment = aligned_allocator<T>::get_alignment();
    assert(alignment <= 0 || aligned_allocator<T>::get_alignment_padding(id) == 0);
    if (pool_size_ > pool_capacity_) {
        resize(pool_size_);
    }
    assert(id < pool_capacity_);
    return pool_.data() + id;
}

template <typename T>
void cosma::memory_pool<T>::free_buffer(T* ptr, size_t size) {
    auto alignment = aligned_allocator<T>::get_alignment();
    // take the alignment into account
    if (alignment > 0) {
        size += aligned_allocator<T>::get_alignment_padding(size);
        assert(aligned_allocator<T>::get_alignment_padding(size) == 0);
    }

    // std::cout << "freeing buffer of size " << size << ", current size =  " << pool_size_ << std::endl;
    assert(pool_size_ >= size);
    pool_size_ -= size;
    --n_buffers_;
    // check if this buffer was on top of the memory pool
    assert(pool_.data() + pool_size_ == ptr);
    assert(alignment <= 0 || aligned_allocator<T>::get_alignment_padding(pool_size_) == 0);
    // std::fill(ptr, ptr + size, T{});
}

template <typename T>
void cosma::memory_pool<T>::resize(size_t capacity) {
    auto alignment = aligned_allocator<T>::get_alignment();
    // resizing should always happen after reserve. 
    // The reserve should take care that the reserved
    // memory is already aligned.
    assert(alignment <= 0 || aligned_allocator<T>::get_alignment_padding(capacity) == 0);

    this->unpin_all();
    resized = true;
    already_pinned = false;
    try {
        pool_.resize(capacity);
    } catch (const std::bad_alloc& e) {
        std::cout << "COSMA (memory pool): not enough space. Try setting the CPU memory limit (see environment variable COSMA_CPU_MAX_MEMORY)." << std::endl;
        throw;
    } catch (const std::length_error& e) {
        std::cout << "COSMA (memory pool): size >= max_size(). Try setting the CPU memory limit (see environment variable COSMA_CPU_MAX_MEMORY)." << std::endl;
        throw;
    } catch (const std::exception& e) {
        std::cout << "COSMA (memory pool): unknown exception, potentially a bug. Please inform us of the test-case." << std::endl;
        throw;
    }
    pool_size_ = capacity;
    pool_capacity_ = capacity;
}

template <typename T>
void cosma::memory_pool<T>::reset() {
    pool_size_ = 0;
    n_buffers_ = 0;
    this->unpin_all();
    resized = false;
    already_pinned = false;
}

template <typename T>
T* cosma::memory_pool<T>::get_pool_pointer() {
    return pool_.data();
}

template <typename T>
void cosma::memory_pool<T>::turn_on_output() {
    output = true;
}

template <typename T>
size_t cosma::memory_pool<T>::size() {
    return pool_size_;
}

template <typename T>
void cosma::memory_pool<T>::reserve(std::vector<size_t>& buffer_sizes) {
    auto alignment = aligned_allocator<T>::get_alignment();
    // total size of all buffers after aligning
    std::size_t size = 0;
    for (auto& buffer_size : buffer_sizes) {
        if (alignment > 0) {
            buffer_size += aligned_allocator<T>::get_alignment_padding(buffer_size);
        }
        size += buffer_size;
    }

    // reserve a bit more for amortized resizing
    size = (std::size_t) std::ceil(size * amortization);
    // take the alignment into account 
    if (alignment > 0) {
        size += aligned_allocator<T>::get_alignment_padding(size);
    }

    if (size > 0 && size > pool_capacity_) {
        pool_capacity_ = size;
        assert(alignment <= 0 || aligned_allocator<T>::get_alignment_padding(pool_capacity_) == 0);
        try {
            pool_.reserve(pool_capacity_);
        } catch (const std::bad_alloc& e) {
            std::cout << "COSMA (memory pool): not enough space. Try setting the CPU memory limit (see environment variable COSMA_CPU_MAX_MEMORY)." << std::endl;
            throw;
        } catch (const std::length_error& e) {
            std::cout << "COSMA (memory pool): size >= max_size(). Try setting the CPU memory limit (see environment variable COSMA_CPU_MAX_MEMORY)." << std::endl;
            throw;
        } catch (const std::exception& e) {
            std::cout << "COSMA (memory pool): unknown exception, potentially a bug. Please inform us of the test-case." << std::endl;
            throw;
        }
    }
}

template <typename T>
void cosma::memory_pool<T>::pin(T* ptr, std::size_t size) {
    auto alignment = aligned_allocator<T>::get_alignment();
    if (alignment > 0) {
        size += aligned_allocator<T>::get_alignment_padding(size);
    }
    // check if it's aligned
    assert(alignment <=0 || aligned_allocator<T>::get_alignment_padding(size) == 0);
#ifdef COSMA_HAVE_GPU
    if (!already_pinned) {
        pinned_buffers_list.add(ptr, size);
    }
#endif
}

template <typename T>
void cosma::memory_pool<T>::unpin_all() {
#ifdef COSMA_HAVE_GPU
    pinned_buffers_list.clear();
#endif
}

#if defined(COSMA_WITH_GPU_AWARE_MPI) || defined(COSMA_WITH_NCCL)
template <typename T>
void cosma::memory_pool<T>::allocate_device_send_buffer(std::size_t size) {
    device_send_buffer.resize(size);
}

template <typename T>
void cosma::memory_pool<T>::allocate_device_receive_buffer(std::size_t size) {
    device_receive_buffer.resize(size);
}
#endif

template class cosma::memory_pool<double>;
template class cosma::memory_pool<float>;
template class cosma::memory_pool<std::complex<double>>;
template class cosma::memory_pool<std::complex<float>>;


================================================
FILE: src/cosma/memory_pool.hpp
================================================
#pragma once
#include <vector>
#include <cosma/pinned_buffers.hpp>
#include <cosma/aligned_allocator.hpp>

#ifdef COSMA_HAVE_GPU
#include <Tiled-MM/device_vector.hpp>
#endif

namespace cosma {
template <typename T>
class memory_pool {
public:
    using aligned_vector_t = std::vector<T, aligned_allocator<T>>;

    memory_pool();

    ~memory_pool();

    // since vector can resize at some point,
    // we don't want to return pointer immediately
    // instead, when a new buffer is requested,
    // we return the current offset within the pool
    // that is used as an id of the buffer.
    // when the buffer actually used, its pointer
    // can be retrieved with get_buffer_pointer
    // that takes the buffer id (i.e. its offset within the pool)
    // and returns its pointer.
    size_t get_buffer_id(size_t size);
    T* get_buffer_pointer(size_t id);
    void free_buffer(T* ptr, size_t size);

    void resize(size_t capacity);
    void reset();

    T* get_pool_pointer();

    void turn_on_output();

    size_t size();
    void reserve(std::vector<size_t>& buffer_sizes);

    void pin(T* ptr, std::size_t size);
    void unpin_all();

    // if true, buffers for this strategy are already pinned
    bool already_pinned = false;
    bool resized = false;

    // scaling factor for the buffer growth
    double amortization;

#if defined(COSMA_WITH_GPU_AWARE_MPI) || defined(COSMA_WITH_NCCL)
    void allocate_device_send_buffer(std::size_t size);
    void allocate_device_receive_buffer(std::size_t size);

    gpu::device_vector<T> device_send_buffer;
    gpu::device_vector<T> device_receive_buffer;
#endif

private:
    aligned_vector_t pool_;
    size_t pool_size_ = 0;
    size_t pool_capacity_ = 0;
    size_t n_buffers_ = 0;
#ifdef COSMA_HAVE_GPU
    pinned_buffers<T> pinned_buffers_list;
#endif
    bool output = false;
};
}


================================================
FILE: src/cosma/mpi_mapper.hpp
================================================
#pragma once

#include <complex>
#include <mpi.h>

namespace cosma {

/**
 * Maps a primitive numeric type to a MPI type.
 *
 * @tparam Scalar the numeric type to be mapped
 */
template <typename Scalar>
struct mpi_mapper {
  static inline MPI_Datatype getType();
};

template <>
inline MPI_Datatype mpi_mapper<double>::getType() {
  return MPI_DOUBLE;
}

template <>
inline MPI_Datatype mpi_mapper<float>::getType() {
  return MPI_FLOAT;
}

template <>
inline MPI_Datatype mpi_mapper<std::complex<float>>::getType() {
  return MPI_C_FLOAT_COMPLEX;
}

template <>
inline MPI_Datatype mpi_mapper<std::complex<double>>::getType() {
  return MPI_C_DOUBLE_COMPLEX;
}

// Removes const qualifier
//
template <typename Scalar>
struct mpi_mapper<const Scalar> {
  static inline MPI_Datatype getType();
};

template <typename Scalar>
inline MPI_Datatype mpi_mapper<const Scalar>::getType() {
  return mpi_mapper<Scalar>::getType();
}

} // end namespace cosma


================================================
FILE: src/cosma/multiply.cpp
================================================
#include <cosma/math_utils.hpp>
#include <cosma/local_multiply.hpp>
#include <cosma/multiply.hpp>
#include <cosma/profiler.hpp>
#include <costa/grid2grid/ranks_reordering.hpp>
#include <costa/grid2grid/transformer.hpp>

#include <complex>

#if defined(COSMA_HAVE_GPU) && defined(COSMA_WITH_NCCL)
#include <cosma/gpu/nccl_utils.hpp>
#endif

#if defined(COSMA_HAVE_GPU) && defined(COSMA_WITH_GPU_AWARE_MPI)
#include <cosma/gpu/gpu_aware_mpi_utils.hpp>
#endif

namespace cosma {
template <typename Scalar>
void multiply(cosma_context<Scalar> *ctx,
              CosmaMatrix<Scalar> &A,
              CosmaMatrix<Scalar> &B,
              CosmaMatrix<Scalar> &C,
              Interval &m,
              Interval &n,
              Interval &k,
              Interval &P,
              size_t step,
              const Strategy &strategy,
              communicator *comm,
              Scalar alpha,
              Scalar beta);

template <typename Scalar>
void sequential(cosma_context<Scalar> *ctx,
                CosmaMatrix<Scalar> &A,
                CosmaMatrix<Scalar> &B,
                CosmaMatrix<Scalar> &C,
                Interval &m,
                Interval &n,
                Interval &k,
                Interval &P,
                size_t step,
                const Strategy &strategy,
                communicator *comm,
                Scalar alpha,
                Scalar beta);

template <typename Scalar>
void parallel(cosma_context<Scalar> *ctx,
              CosmaMatrix<Scalar> &A,
              CosmaMatrix<Scalar> &B,
              CosmaMatrix<Scalar> &C,
              Interval &m,
              Interval &n,
              Interval &k,
              Interval &P,
              size_t step,
              const Strategy &strategy,
              communicator *comm,
              Scalar alpha,
              Scalar beta);

template <typename T>
void multiply_using_layout(costa::grid_layout<T> &A,
                           costa::grid_layout<T> &B,
                           costa::grid_layout<T> &C,
                           T alpha,
                           T beta,
                           char transa,
                           char transb,
                           MPI_Comm comm) {
    multiply_using_layout<T>(
        get_context_instance<T>(), A, B, C, alpha, beta, transa, transb, comm);
}

template <typename T>
void multiply_using_layout(cosma_context<T> *ctx,
                           costa::grid_layout<T> &A,
                           costa::grid_layout<T> &B,
                           costa::grid_layout<T> &C,
                           T alpha,
                           T beta,
                           char transa,
                           char transb,
                           MPI_Comm comm) {
    assert(A.num_cols() == B.num_rows());

    // Note: `k` is always the shared dimension.
    //
    int m = A.num_rows();
    int n = B.num_cols();
    int k = A.num_cols();

    // **********************************
    //           CORNER CASES
    // **********************************
    // edge cases, which are allowed by the standard
    if (m == 0 || n == 0) return;
    // afterwards we are sure m != 0 and n != 0
    if (k == 0 || alpha == T{0}) {
        // scale matrix C by beta
        // starting from (ic-1, jc-1)
        C.scale_by(beta);
        return;
    }
    if (beta == T{0}) {
        C.fill(beta);
    }

    char trans_a = std::toupper(transa);
    char trans_b = std::toupper(transb);

    int rank, P;
    MPI_Comm_rank(comm, &rank);
    MPI_Comm_size(comm, &P);

    // find an optimal strategy for this problem
    Strategy strategy(m, n, k, P);
    // enable overlapping communication and computation if turned on
    if (get_context_instance<T>()->overlap_comm_and_comp) {
        strategy.enable_overlapping_comm_and_comp();
    }

    // create COSMA mappers
    Mapper mapper_a('A', strategy, rank);
    Mapper mapper_b('B', strategy, rank);
    Mapper mapper_c('C', strategy, rank);

    // get abstract grid for COSMA layout
    auto cosma_grid_a = mapper_a.get_layout_grid();
    auto cosma_grid_b = mapper_b.get_layout_grid();
    auto cosma_grid_c = mapper_c.get_layout_grid();

    // total communication volume for transformation of layouts
    auto comm_vol = costa::communication_volume(A.grid, cosma_grid_a, 'N');
    comm_vol += costa::communication_volume(B.grid, cosma_grid_b, 'N');
    comm_vol += costa::communication_volume(cosma_grid_c, C.grid, 'N');

    // compute the optimal rank reordering that minimizes the communication
    // volume
    bool reordered = false;
    std::vector<int> rank_permutation =
        costa::optimal_reordering(comm_vol, P, reordered);
    // create reordered communicator, which has same ranks
    // but relabelled as given by the rank_permutation
    // (to avoid the communication during layout transformation)
    PE(transform_reordering_comm);
    MPI_Comm reordered_comm = comm;
    if (reordered) {
        MPI_Comm_split(comm, 0, rank_permutation[rank], &reordered_comm);
    }
    PL();

    CosmaMatrix<T> A_cosma(ctx, std::move(mapper_a), rank_permutation[rank]);
    CosmaMatrix<T> B_cosma(ctx, std::move(mapper_b), rank_permutation[rank]);
    CosmaMatrix<T> C_cosma(ctx, std::move(mapper_c), rank_permutation[rank]);

    // avoid resizing the buffer by reserving immediately the total required memory
    // collect sizes of all buffers that are going to be allocated for each matrix
    auto A_buffers = A_cosma.required_memory();
    auto B_buffers = B_cosma.required_memory();
    auto C_buffers = C_cosma.required_memory();
    std::vector<std::size_t> buffer_sizes;
    int n_buffers = A_buffers.size() + B_buffers.size() + C_buffers.size();
    if (n_buffers > 0) {
        buffer_sizes.reserve(n_buffers);
        std::copy(A_buffers.begin(), A_buffers.end(), std::back_inserter(buffer_sizes));
        std::copy(B_buffers.begin(), B_buffers.end(), std::back_inserter(buffer_sizes));
        std::copy(C_buffers.begin(), C_buffers.end(), std::back_inserter(buffer_sizes));

        // allocate all buffers in the memory pool
        get_context_instance<T>()->get_memory_pool().reserve(buffer_sizes);
    }

    // get abstract layouts for COSMA layout
    auto cosma_layout_a = A_cosma.get_grid_layout();
    auto cosma_layout_b = B_cosma.get_grid_layout();
    auto cosma_layout_c = C_cosma.get_grid_layout();

    cosma_layout_a.reorder_ranks(rank_permutation);
    cosma_layout_b.reorder_ranks(rank_permutation);
    cosma_layout_c.reorder_ranks(rank_permutation);

    // schedule A and B transforms together from given layout to cosma layout
    costa::transformer<T> transf(comm);
    transf.schedule(A, cosma_layout_a, trans_a, T{1}, T{0});
    transf.schedule(B, cosma_layout_b, trans_b, T{1}, T{0});
    // transform all scheduled transformations together
    transf.transform();

    // perform cosma multiplication
    // auto ctx = cosma::make_context<T>();
    multiply<T>(
        ctx, A_cosma, B_cosma, C_cosma, strategy, reordered_comm, T{1}, T{0});

    // construct cosma layout again, to avoid outdated
    // pointers when the memory pool has been used
    // in case it resized during multiply
    cosma_layout_c = C_cosma.get_grid_layout();
    cosma_layout_c.reorder_ranks(rank_permutation);
    // transform the result back
    transf.schedule(cosma_layout_c, C, 'N', alpha, beta);
    transf.transform();

    // free up the reordered communicator
    PE(transform_reordering_comm);
    if (reordered) {
        MPI_Comm_free(&reordered_comm);
    }
    PL();

}

/*
 Compute C = alpha*A*B + beta*C
 Assumption: we assume that at each step only 1 dimension is split
*/

// using the context from matrices
template <typename Scalar>
void multiply(CosmaMatrix<Scalar> &matrixA,
              CosmaMatrix<Scalar> &matrixB,
              CosmaMatrix<Scalar> &matrixC,
              const Strategy &strategy,
              MPI_Comm comm,
              Scalar alpha,
              Scalar beta) {
    assert(matrixA.get_context() == matrixB.get_context() &&
           matrixB.get_context() == matrixC.get_context());
    multiply(matrixA.get_context(),
             matrixA,
             matrixB,
             matrixC,
             strategy,
             comm,
             alpha,
             beta);
}

// using the given context
template <typename Scalar>
void multiply(cosma_context<Scalar> *ctx,
              CosmaMatrix<Scalar> &matrixA,
              CosmaMatrix<Scalar> &matrixB,
              CosmaMatrix<Scalar> &matrixC,
              const Strategy &strategy,
              MPI_Comm comm,
              Scalar alpha,
              Scalar beta) {
    // edge cases, which are allowed by the standard (m, n or k can be 0)
    if (strategy.m == 0 || strategy.n == 0 || strategy.k == 0) {
        return;
    }

    // register reusable objects in the context
    ctx->register_state(comm, strategy);
    if (comm == MPI_COMM_NULL || ctx->get_cosma_comm()->is_idle()) {
	return;
    }

    Interval mi = Interval(0, strategy.m - 1);
    Interval ni = Interval(0, strategy.n - 1);
    Interval ki = Interval(0, strategy.k - 1);
    Interval Pi = Interval(0, strategy.P - 1);

    PE(preprocessing_allocation);

    // allocate buffers used for communication
    matrixA.allocate_communication_buffers();
    matrixB.allocate_communication_buffers();
    matrixC.allocate_communication_buffers();

    // once all buffers are allocated from the memory pool
    // we know that the memory pool will not be resized
    // and thus we can safely set the pointer to the
    // initial buffers in all matrices.
    matrixA.initialize();
    matrixB.initialize();
    matrixC.initialize();

    // check if all the local matrices belong to
    // the current rank
    assert(matrixA.rank() == matrixB.rank());
    assert(matrixB.rank() == matrixC.rank());
    PL();

    multiply(ctx,
    	 matrixA,
    	 matrixB,
    	 matrixC,
    	 mi,
    	 ni,
    	 ki,
    	 Pi,
    	 0,
    	 strategy,
    	 ctx->get_cosma_comm(),
    	 alpha,
    	 beta);

    // deallocate buffers used for communication
    // since its a stack allocator, we deallocate
    // in the opposite order than when we allocated
    PE(preprocessing_allocation);
    matrixC.free_communication_buffers();
    matrixB.free_communication_buffers();
    matrixA.free_communication_buffers();
    PL();

    if (ctx->get_cosma_comm()->rank() == 0) {
        PP();
    }
}

template <typename Scalar>
void multiply(cosma_context<Scalar> *ctx,
              CosmaMatrix<Scalar> &matrixA,
              CosmaMatrix<Scalar> &matrixB,
              CosmaMatrix<Scalar> &matrixC,
              Interval &m,
              Interval &n,
              Interval &k,
              Interval &P,
              size_t step,
              const Strategy &strategy,
              communicator *comm,
              Scalar alpha,
              Scalar beta) {
    PE(multiply_other);
#ifdef DEBUG
    std::cout << "matrix A, buffer index = " << matrixA.buffer_index()
              << std::endl;
    std::cout << "matrix B, buffer index = " << matrixB.buffer_index()
              << std::endl;
    std::cout << "matrix C, buffer index = " << matrixC.buffer_index()
              << std::endl;
#endif

    // current submatrices that are being computed
    Interval2D a_range(m, k);
    Interval2D b_range(k, n);
    Interval2D c_range(m, n);

    // For each of P processors remember which sequential bucket we are
    // currently on
    std::vector<int> bucketA = matrixA.seq_buckets(P);
    std::vector<int> bucketB = matrixB.seq_buckets(P);
    std::vector<int> bucketC = matrixC.seq_buckets(P);

    // Skip all buckets that are "before" the current submatrices.
    // the relation submatrix1 <before> submatrix2 is defined in Interval2D.
    // Intuitively, this will skip all the buckets that are "above" or "on the
    // left" of the current submatrices. We say "before" because whenever we
    // split sequentially, we always first start with the "above" submatrix (if
    // the splitting is horizontal) or with the left one (if the splitting is
    // vertical). which explains the name of the relation "before".
    matrixA.update_buckets(P, a_range);
    matrixB.update_buckets(P, b_range);
    matrixC.update_buckets(P, c_range);

    // This iterates over the skipped buckets and sums up their sizes,
    // and increases the pointer of the current matrix for the offset
    int offsetA = matrixA.shift(bucketA[comm->relative_rank(P)]);
    int offsetB = matrixB.shift(bucketB[comm->relative_rank(P)]);
    int offsetC = matrixC.shift(bucketC[comm->relative_rank(P)]);
    PL();

    if (strategy.final_step(step) || strategy.empty()) {
        bool copy_c_back = true;
        bool nccl_enabled = false;
        bool gpu_aware_mpi_enabled = false;

#ifdef COSMA_WITH_NCCL
        nccl_enabled = true;
#endif
#ifdef COSMA_WITH_GPU_AWARE_MPI
        gpu_aware_mpi_enabled = true;
#endif

        if (gpu_aware_mpi_enabled || nccl_enabled) {
            copy_c_back = !(step > 0 && strategy.parallel_step(step-1) && strategy.split_k(step-1));
        }

        local_multiply(ctx,
                       matrixA.current_matrix(),
                       matrixB.current_matrix(),
                       matrixC.current_matrix(),
                       m.length(),
                       n.length(),
                       k.length(),
                       alpha,
                       beta,
                       copy_c_back);
    } else {
        if (strategy.parallel_step(step)) {
            if (strategy.should_overlap_comm_and_comp(step)) {
                comm->overlap_comm_and_comp(ctx,
                                           matrixA,
                                           matrixB,
                                           matrixC,
                                           m,
                                           n,
                                           k,
                                           P,
                                           step,
                                           alpha,
                                           beta);
                // parallel(matrixA, matrixB, matrixC, m, n, k, P, step,
                // strategy, comm, beta);
            } else {
                parallel(ctx,
                         matrixA,
                         matrixB,
                         matrixC,
                         m,
                         n,
                         k,
                         P,
                         step,
                         strategy,
                         comm,
                         alpha,
                         beta);
            }
        } else {
            sequential(ctx,
                       matrixA,
                       matrixB,
                       matrixC,
                       m,
                       n,
                       k,
                       P,
                       step,
                       strategy,
                       comm,
                       alpha,
                       beta);
        }
    }

    PE(multiply_other);
    // shift the pointers of the current matrix back
    matrixA.unshift(offsetA);
    matrixB.unshift(offsetB);
    matrixC.unshift(offsetC);

    // Revert the buckets pointers to their previous values.
    matrixA.set_seq_buckets(P, bucketA);
    matrixB.set_seq_buckets(P, bucketB);
    matrixC.set_seq_buckets(P, bucketC);
    PL();
}

/*
 In each sequential step, one of the dimensions is split,
 and each of the subproblems is solved sequentially by all P processors.
*/
template <typename Scalar>
void sequential(cosma_context<Scalar> *ctx,
                CosmaMatrix<Scalar> &matrixA,
                CosmaMatrix<Scalar> &matrixB,
                CosmaMatrix<Scalar> &matrixC,
                Interval &m,
                Interval &n,
                Interval &k,
                Interval &P,
                size_t step,
                const Strategy &strategy,
                communicator *comm,
                Scalar alpha,
                Scalar beta) {
    // split the dimension but not the processors, all P processors are taking
    // part in each substep.
    if (strategy.split_m(step)) {
        for (int M = 0; M < strategy.divisor(step); ++M) {
            Interval newm = m.subinterval(strategy.divisor(step), M);
            multiply(ctx,
                     matrixA,
                     matrixB,
                     matrixC,
                     newm,
                     n,
                     k,
                     P,
                     step + 1,
                     strategy,
                     comm,
                     alpha,
                     beta);
            // this only affects the GPU backend.
            // if sequential steps are used, then each sequential step 
            // is reusing the same communication buffers. 
            // However, if the strategy contains steps 
            // which are not perfectly divisible then
            // this might result in each sequential step requiring
            // slightly different pointers to be pinned and thus
            // we cannot reuse the already pinned buffers from
            // the previous sequential step. We have to unpin
            // all the buffers from the previous step, to avoid
            // getting the GPU runtime error that 
            // some part of the buffer is already pinned.
            if (strategy.irregular) {
                ctx->get_memory_pool().unpin_all();
            }
        }
        return;
    }

    if (strategy.split_n(step)) {
        for (int N = 0; N < strategy.divisor(step); ++N) {
            Interval newn = n.subinterval(strategy.divisor(step), N);
            multiply(ctx,
                     matrixA,
                     matrixB,
                     matrixC,
                     m,
                     newn,
                     k,
                     P,
                     step + 1,
                     strategy,
                     comm,
                     alpha,
                     beta);
            // this only affects the GPU backend.
            // if sequential steps are used, then each sequential step 
            // is reusing the same communication buffers. 
            // However, if the strategy contains steps 
            // which are not perfectly divisible then
            // this might result in each sequential step requiring
            // slightly different pointers to be pinned and thus
            // we cannot reuse the already pinned buffers from
            // the previous sequential step. We have to unpin
            // all the buffers from the previous step, to avoid
            // getting the GPU runtime error that 
            // some part of the buffer is already pinned.
            if (strategy.irregular) {
                ctx->get_memory_pool().unpin_all();
            }
        }
        return;
    }

    // if divided by k, then the result of each subproblem is just a partial
    // result for C which should all be summed up. We solve this by letting beta
    // parameter be 1 in the substeps that follow so that dgemm automatically
    // adds up the subsequent results to the previous partial results of C.
    if (strategy.split_k(step)) {
        for (int K = 0; K < strategy.divisor(step); ++K) {
            Interval newk = k.subinterval(strategy.divisor(step), K);
            auto new_beta = beta;
            if (K != 0) {
                new_beta = Scalar{1};
            }
            multiply(ctx,
                     matrixA,
                     matrixB,
                     matrixC,
                     m,
                     n,
                     newk,
                     P,
                     step + 1,
                     strategy,
                     comm,
                     alpha,
                     new_beta);
            // this only affects the GPU backend.
            // if sequential steps are used, then each sequential step 
            // is reusing the same communication buffers. 
            // However, if the strategy contains steps 
            // which are not perfectly divisible then
            // this might result in each sequential step requiring
            // slightly different pointers to be pinned and thus
            // we cannot reuse the already pinned buffers from
            // the previous sequential step. We have to unpin
            // all the buffers from the previous step, to avoid
            // getting the GPU runtime error that 
            // some part of the buffer is already pinned.
            if (strategy.irregular) {
                ctx->get_memory_pool().unpin_all();
            }
        }
        return;
    }
}

template <typename T>
T which_is_expanded(T &&A,
                    T &&B,
                    T &&C,
                    const Strategy &strategy,
                    size_t step) {
    // divn > 1 => divm==divk==1 => matrix A has not been splitted
    // therefore it is expanded (in the communication of a parallel step)
    if (strategy.split_n(step))
        return std::forward<T>(A);

    // divm > 1 => divk==divn==1 => matrix B has not been splitted
    // therefore it is expanded (in the communication of a parallel step)
    if (strategy.split_m(step))
        return std::forward<T>(B);

    // divk > 1 => divm==divn==1 => matrix C has not been splitted
    // therefore it is expanded (in the communication of a parallel step)
    if (strategy.split_k(step))
        return std::forward<T>(C);

    // this should never happen
    return std::forward<T>(C);
}

/*
 In a parallel step one of the dimensions is split into "div" pieces.
 Also, ranks P are split into "div" groups of "newP" processors
 such that each group of ranks is in charge of one piece of the split matrix.

 * if m split:
 Split matrix A and copy matrix B such that each of the "div" groups with newP
 processors contains the whole matrix B (that was previously owned by P
 processors). Communication is necessary since we want that newP<P processors
 own what was previously owned by P processors After the communication, each
 group of processors will contain identical data of matrix B in exactly the same
 order in all groups.

 * if n split:
 Split matrix B and copy matrix A such that each of the "div" groups with newP
 processors contains the whole matrix A (that was previously owned by P
 processors). Communication is necessary since we want that newP<P processors
 own what was previously owned by P processors After the communication, each
 group of processors will contain identical data of matrix A in exactly the same
 order in all groups.

 * if k split:
 Split both matrices A and B (since both of these matrices own dimension k).
 After the substep, each of "div" groups with newP processors will own
 a partial result of matrix C which should all be summed up (reduced) and
 splitted equally among all P processors. Thus, here we first sum up all the
 partial results that are owned by newP processors, and then we split it equally
 among P processors. While in the previous two cases we had to expand local
 matrices (since newP processors had to own what was previously owned by P
 processors) here we have the opposite - P ranks should own what was previously
 owned by newP ranks - thus local matrices are shrinked.
 */
template <typename Scalar>
void parallel(cosma_context<Scalar> *ctx,
              CosmaMatrix<Scalar> &matrixA,
              CosmaMatrix<Scalar> &matrixB,
              CosmaMatrix<Scalar> &matrixC,
              Interval &m,
              Interval &n,
              Interval &k,
              Interval &P,
              size_t step,
              const Strategy &strategy,
              communicator *comm,
              Scalar alpha,
              Scalar beta) {
    PE(multiply_other);
    int divisor = strategy.divisor(step);
    int divisor_m = strategy.divisor_m(step);
    int divisor_n = strategy.divisor_n(step);
    int divisor_k = strategy.divisor_k(step);
    // processor subinterval which the current rank belongs to
    int partition_idx = P.subinterval_index(divisor, comm->rank());
    Interval newP = P.subinterval(divisor, partition_idx);
    // intervals of M, N and K that the current rank is in charge of,
    // together with other ranks from its group.
    // (see the definition of group and offset below)
    Interval newm = m.subinterval(divisor_m, divisor_m > 1 ? partition_idx : 0);
    Interval newn = n.subinterval(divisor_n, divisor_n > 1 ? partition_idx : 0);
    Interval newk = k.subinterval(divisor_k, divisor_k > 1 ? partition_idx : 0);

    /*
     * size_before_expansion:
     maps rank i from interval P to the vector [bucket1.size(),
     bucket2.size()...] containing buckets which are inside "range" that rank i
     owns

     * total_before_expansion:
     maps rank i from interval P to the sum of all buckets inside
     size_before_expansion[i]

     * size_after_expansion:
     maps rank i from interval newP to the vector of [bucket1.size(),
     bucket2.size()...] but each bucket here is expanded, i.e. each bucket size
     in this vector is actually the sum of the sizes of this bucket in all the
     ranks from the communication ring of the current rank.

     * total_after_expansion:
     maps rank i from interval P to the sum of all buckets inside
     size_after_expansion[i]
     */
    std::vector<std::vector<int>> size_before_expansion(P.length());
    std::vector<int> total_before_expansion(P.length());
    std::vector<std::vector<int>> size_after_expansion(newP.length());
    std::vector<int> total_after_expansion(newP.length());

    /*
     * this gives us the 2D interval of the matrix that will be expanded:
     if divm > 1 => matrix B expanded => Interval2D(k, n)
     if divn > 1 => matrix A expanded => Interval2D(m, k)
     if divk > 1 => matrix C expanded => Interval2D(m, n)
     */
    Interval row_copy = which_is_expanded(m, k, m, strategy, step);
    Interval col_copy = which_is_expanded(k, n, n, strategy, step);
    Interval2D range(row_copy, col_copy);

    /*
     * this gives us a matrix that is expanded:
     if divm > 1 => matrix B is expanded
     if divn > 1 => matrix A is expanded
     if divk > 1 => matrix C is expanded
     */
    CosmaMatrix<Scalar> &expanded_mat =
        which_is_expanded(matrixA, matrixB, matrixC, strategy, step);
    // gets the buffer sizes before and after expansion.
    // this still does not modify the buffer sizes inside layout
    // it just tells us what they would be.
    expanded_mat.buffers_before_expansion(
        P, range, size_before_expansion, total_before_expansion);

    expanded_mat.buffers_after_expansion(P,
                                         newP,
                                         size_before_expansion,
                                         total_before_expansion,
                                         size_after_expansion,
                                         total_after_expansion);

    // increase the buffer sizes before the substeps
    expanded_mat.set_sizes(newP, size_after_expansion);
    // this is the sum of sizes of all the buckets after expansion
    // that the current rank will own.
    // which is also the size of the matrix after expansion
    int new_size = total_after_expansion[comm->relative_rank(newP)];

    int buffer_idx = expanded_mat.buffer_index();
    expanded_mat.advance_buffer();

    Scalar *original_matrix = expanded_mat.current_matrix();
    Scalar *expanded_matrix = expanded_mat.buffer_ptr();
    Scalar *reshuffle_buffer = expanded_mat.reshuffle_buffer_ptr();

    // pack the data for the next substep
    expanded_mat.set_current_matrix(expanded_matrix);
    PL();

    // if divided along m or n then copy original matrix inside communication
    // ring to get the expanded matrix (all ranks inside communication ring
    // should own exactly the same data in the expanded matrix.
    if (strategy.split_m(step) || strategy.split_n(step)) {
        // copy the matrix that wasn't divided in this step
#ifdef COSMA_WITH_NCCL
        cosma::gpu::nccl_copy(ctx,
                              P,
                              original_matrix,
                              expanded_matrix,
                              reshuffle_buffer,
                              size_before_expansion,
                              total_before_expansion,
                              new_size,
                              step);
#elif COSMA_WITH_GPU_AWARE_MPI
        cosma::gpu::gpu_aware_mpi_copy(
                              ctx,
                              P,
                              original_matrix,
                              expanded_matrix,
                              reshuffle_buffer,
                              size_before_expansion,
                              total_before_expansion,
                              new_size,
                              step);
#else
        comm->copy(P,
                  original_matrix,
                  expanded_matrix,
                  reshuffle_buffer,
                  size_before_expansion,
                  total_before_expansion,
                  new_size,
                  step);
#endif
    }

    // if division by k, and we are in the branch where beta > 0, then
    // reset beta to 0, but keep in mind that on the way back from the substeps
    // we will have to sum the result with the local data in C
    // this is necessary since reduction happens AFTER the substeps
    // so we cannot pass beta = 1 if the data is not present there BEFORE the
    // substeps.
    auto new_beta = beta;
    if (strategy.split_k(step) && beta != Scalar{0}) {
        new_beta = Scalar{0};

        //*******************************************
        // swaping reduce_buffer and original_matrix
        //*******************************************
        /* Why this is necessary:
           Assume the case: (m, n, k, P) = (4, 4, 8, 4),
           with strategy being: -s pk2,pk2 and beta = 1.0

           In this case, matrix C will only allocate 3 buffers: 
           1) initial buffer (send buffer)
           2) communication buffer (receive buffer)
           3) reduce_buffer (temporary buffer)

           In the first parallel step, the following will happen:
           1) beta = 1.0, but new_beta = 0.0;
           2) buffer_index will point to communication buffer

           In the second parallel step:
           1) beta = 0.0
           2) initial and communication buffers will swap:
           the communication buffer will become the new send buffer
           and the initial buffer will become the new receive buffer

           This means that the initial buffer will be overwritten
           with partial results of the nested parallel k/2 step.

           Then, when the outer parallel step wants to accumulate: 
           C = beta * C + sum(partial results)
           However, the values in C are not anymore the initial values, 
           but the values of the inner reduction which overwrote C.

           This happens when all following conditions are met:
           1) beta > 0:
                When beta == 0, then we can easily overwite C with
                intermediate results, without worring about loosing the data.
           2) There are no sequential m or n divisions before parallel k steps:
                When sequential m or n divisions occur, then they
                enforce new buffers to be used in subsequent communications
                and thus the initial values in C stay preserved.
           3) Parallel k division occurs more than once:
                If parallel k step occurs only once, then
                buffer swapping never occurs, since we only
                have one communication round of matrix C,
                so the initial buffer only serves as the send buffer.

           When none of these conditions are met, we have to:
           - either: 
             1) allocate an additional communication buffer
                so that the initial buffer never gets written to
                during communication rounds
           - or:
             2) preserve the initial content of C temporarily
                in a temporary buffer (e.g. in a reduce_buffer)
                given that the temp buffer is not used 
                in any subsequent step.

           Here we chose to take the option 2).

           We temporarily swap the original_matrix
           (possibly containing the initial values of C)
           with the reduce buffer that is not used in nested steps.

           We can do this, because the following is guaranteed:
           1) if beta > 0 in some parallel step where k is divided
              then all nested steps (both parallel and sequential) 
              will have beta = 0 (since we set new_beta = 0).
           2) size(reduce_buffer) >= size(initial C buffer)
           3) Even if there is only 1 parallel k step
              (and thus the stated problem is not present)
              swapping these buffers will not hurt.
        */
        expanded_mat.swap_reduce_buffer_with(buffer_idx);
    }

    multiply(ctx,
             matrixA,
             matrixB,
             matrixC,
             newm,
             newn,
             newk,
             newP,
             step + 1,
             strategy,
             comm,
             alpha,
             new_beta);

#ifdef DEBUG
    std::cout << "rank = " << comm->rank() << ", label = " << expanded_mat.label() << std::endl;
    if (comm->rank() == 0 && expanded_mat.label() == 'C') {
        std::cout << "expanded matrix after multiply: " << std::endl;
        int local_size = size_before_expansion[comm->rank() - P.first()][0];
        for (int i = 0; i < local_size; ++i) {
            std::cout << *(expanded_mat.current_matrix() + i) << ", ";
        }
        std::cout << std::endl;
        std::cout << "buff_idx = " << buffer_idx << std::endl;
    }
#endif

    // revert changes after the recurrsion
    if (strategy.split_k(step) && beta != Scalar{0}) {
        // swap reduce_buffer with original matrix C
        expanded_mat.swap_reduce_buffer_with(buffer_idx);
    }

    // revert the current matrix
    expanded_mat.set_buffer_index(buffer_idx);
    expanded_mat.set_current_matrix(original_matrix);

    // if division by k do additional reduction of C
    if (strategy.split_k(step)) {
        Scalar *reduce_buffer = expanded_mat.reduce_buffer_ptr();
#ifdef COSMA_WITH_NCCL
        bool copy_c_back = !strategy.final_step(step+1);
        cosma::gpu::nccl_reduce(ctx,
                                P,
                                expanded_matrix,
                                original_matrix,
                                reshuffle_buffer,
                                reduce_buffer,
                                size_before_expansion,
                                total_before_expansion,
                                size_after_expansion,
                                total_after_expansion,
                                beta,
                                step,
                                copy_c_back);
#elif COSMA_WITH_GPU_AWARE_MPI
        bool copy_c_back = !strategy.final_step(step+1);
        cosma::gpu::gpu_aware_mpi_reduce(
                                ctx,
                                P,
                                expanded_matrix,
                                original_matrix,
                                reshuffle_buffer,
                                reduce_buffer,
                                size_before_expansion,
                                total_before_expansion,
                                size_after_expansion,
                                total_after_expansion,
                                beta,
                                step,
                                copy_c_back);
#else
        comm->reduce(P,
                    expanded_matrix,
                    original_matrix,
                    reshuffle_buffer,
                    reduce_buffer,
                    size_before_expansion,
                    total_before_expansion,
                    size_after_expansion,
                    total_after_expansion,
                    alpha,
                    beta,
                    step);
#endif
    }

    PE(multiply_other);
    // after the memory is freed, the buffer sizes are back to the previous
    // values (the values at the beginning of this parallel step)
    expanded_mat.set_sizes(
        newP, size_before_expansion, newP.first() - P.first());
    PL();
}

using zfloat_t = std::complex<float>;
using zdouble_t = std::complex<double>;

// explicit instantiation for multiply_using_layout without context
template void multiply_using_layout<double>(costa::grid_layout<double> &A,
                                            costa::grid_layout<double> &B,
                                            costa::grid_layout<double> &C,
                                            double alpha,
                                            double beta,
                                            char transa,
                                            char transb,
                                            MPI_Comm comm);

template void multiply_using_layout<float>(costa::grid_layout<float> &A,
                                           costa::grid_layout<float> &B,
                                           costa::grid_layout<float> &C,
                                           float alpha,
                                           float beta,
                                           char transa,
                                           char transb,
                                           MPI_Comm comm);

template void
multiply_using_layout<zdouble_t>(costa::grid_layout<zdouble_t> &A,
                                 costa::grid_layout<zdouble_t> &B,
                                 costa::grid_layout<zdouble_t> &C,
                                 zdouble_t alpha,
                                 zdouble_t beta,
                                 char transa,
                                 char transb,
                                 MPI_Comm comm);

template void
multiply_using_layout<zfloat_t>(costa::grid_layout<zfloat_t> &A,
                                costa::grid_layout<zfloat_t> &B,
                                costa::grid_layout<zfloat_t> &C,
                                zfloat_t alpha,
                                zfloat_t beta,
                                char transa,
                                char transb,
                                MPI_Comm comm);

// explicit instantiation for multiply_using_layout with context
template void multiply_using_layout<double>(cosma_context<double> *ctx,
                                            costa::grid_layout<double> &A,
                                            costa::grid_layout<double> &B,
                                            costa::grid_layout<double> &C,
                                            double alpha,
                                            double beta,
                                            char transa,
                                            char transb,
                                            MPI_Comm comm);

template void multiply_using_layout<float>(cosma_context<float> *ctx,
                                           costa::grid_layout<float> &A,
                                           costa::grid_layout<float> &B,
                                           costa::grid_layout<float> &C,
                                           float alpha,
                                           float beta,
                                           char transa,
                                           char transb,
                                           MPI_Comm comm);

template void
multiply_using_layout<zdouble_t>(cosma_context<zdouble_t> *ctx,
                                 costa::grid_layout<zdouble_t> &A,
                                 costa::grid_layout<zdouble_t> &B,
                                 costa::grid_layout<zdouble_t> &C,
                                 zdouble_t alpha,
                                 zdouble_t beta,
                                 char transa,
                                 char transb,
                                 MPI_Comm comm);

template void
multiply_using_layout<zfloat_t>(cosma_context<zfloat_t> *ctx,
                                costa::grid_layout<zfloat_t> &A,
                                costa::grid_layout<zfloat_t> &B,
                                costa::grid_layout<zfloat_t> &C,
                                zfloat_t alpha,
                                zfloat_t beta,
                                char transa,
                                char transb,
                                MPI_Comm comm);

// Explicit instantiations for short `multiply`
template void multiply<double>(cosma_context<double> *ctx,
                               CosmaMatrix<double> &A,
                               CosmaMatrix<double> &B,
                               CosmaMatrix<double> &C,
                               const Strategy &strategy,
                               MPI_Comm comm,
                               double alpha,
                               double beta);

template void multiply<float>(cosma_context<float> *ctx,
                              CosmaMatrix<float> &A,
                              CosmaMatrix<float> &B,
                              CosmaMatrix<float> &C,
                              const Strategy &strategy,
                              MPI_Comm comm,
                              float alpha,
                              float beta);

template void multiply<zdouble_t>(cosma_context<zdouble_t> *ctx,
                                  CosmaMatrix<zdouble_t> &A,
                                  CosmaMatrix<zdouble_t> &B,
                                  CosmaMatrix<zdouble_t> &C,
                                  const Strategy &strategy,
                                  MPI_Comm comm,
                                  zdouble_t alpha,
                                  zdouble_t beta);

template void multiply<zfloat_t>(cosma_context<zfloat_t> *ctx,
                                 CosmaMatrix<zfloat_t> &A,
                                 CosmaMatrix<zfloat_t> &B,
                                 CosmaMatrix<zfloat_t> &C,
                                 const Strategy &strategy,
                                 MPI_Comm comm,
                                 zfloat_t alpha,
                                 zfloat_t beta);

// Explicit instantiations for short `multiply` without the context
//
template void multiply<double>(CosmaMatrix<double> &A,
                               CosmaMatrix<double> &B,
                               CosmaMatrix<double> &C,
                               const Strategy &strategy,
                               MPI_Comm comm,
                               double alpha,
                               double beta);

template void multiply<float>(CosmaMatrix<float> &A,
                              CosmaMatrix<float> &B,
                              CosmaMatrix<float> &C,
                              const Strategy &strategy,
                              MPI_Comm comm,
                              float alpha,
                              float beta);

template void multiply<zdouble_t>(CosmaMatrix<zdouble_t> &A,
                                  CosmaMatrix<zdouble_t> &B,
                                  CosmaMatrix<zdouble_t> &C,
                                  const Strategy &strategy,
                                  MPI_Comm comm,
                                  zdouble_t alpha,
                                  zdouble_t beta);

template void multiply<zfloat_t>(CosmaMatrix<zfloat_t> &A,
                                 CosmaMatrix<zfloat_t> &B,
                                 CosmaMatrix<zfloat_t> &C,
                                 const Strategy &strategy,
                                 MPI_Comm comm,
                                 zfloat_t alpha,
                                 zfloat_t beta);
} // namespace cosma


================================================
FILE: src/cosma/multiply.hpp
================================================
#pragma once

#include <cosma/communicator.hpp>
#include <cosma/context.hpp>
#include <cosma/interval.hpp>
#include <cosma/matrix.hpp>
#include <cosma/strategy.hpp>

#include <mpi.h>

#include <costa/grid2grid/transform.hpp>

namespace cosma {

/*
 * Performs matrix multiplication: C = alpha*op(A)*op(B) + beta*C,
 * where alpha and beta are scalars and op can be:
 * no-transpose ('N'), transpose ('T') or transpose and conjugate ('C')
 */

/*
 * Takes matrices given in an arbitrary grid-like data layouts
 * COSTA represents the abstract representation of the target layout
 * where target layout is the initial data layout for matrices A and B
 * and the final data layout for matrix C.
 * this function will perform the transformations between the target
 * layouts and the optimal COSMA layout and perform the multiplication.
 * it is not as efficient as using the native COSMA layout,
 * but is very general as it can work with any grid-like data layout.
 */
template <typename Scalar>
void multiply_using_layout(costa::grid_layout<Scalar> &A_layout,
                           costa::grid_layout<Scalar> &B_layout,
                           costa::grid_layout<Scalar> &C_layout,
                           Scalar alpha,
                           Scalar beta,
                           char transa,
                           char transb,
                           MPI_Comm comm);

/*
 * Takes matrices in the optimal COSMA layout and the division strategy
 * and performs the multiplication. It is very efficient as it uses the
 * optimal COSMA layout.
 */

template <typename Scalar>
void multiply(CosmaMatrix<Scalar> &A,
              CosmaMatrix<Scalar> &B,
              CosmaMatrix<Scalar> &C,
              const Strategy &strategy,
              MPI_Comm comm,
              Scalar alpha,
              Scalar beta);
} // namespace cosma


================================================
FILE: src/cosma/one_sided_communicator.cpp
================================================
#include <cosma/one_sided_communicator.hpp>

#include <cosma/local_multiply.hpp>
#include <cosma/math_utils.hpp>
#include <cosma/mpi_mapper.hpp>
#include <cosma/profiler.hpp>

#include <algorithm>
#include <atomic>
#include <chrono>
#include <complex>
#include <condition_variable>
#include <cstring>
#include <future>
#include <iostream>
#include <mutex>
#include <stdlib.h>
#include <thread>
#include <tuple>
#include <vector>

namespace cosma {

namespace one_sided_communicator {

template <typename Scalar>
MPI_Win
create_window(MPI_Comm comm, Scalar *pointer, size_t size, bool no_locks) {
    MPI_Info info;
    MPI_Info_create(&info);
    if (no_locks) {
        MPI_Info_set(info, "no_locks", "true");
    } else {
        MPI_Info_set(info, "no_locks", "false");
    }
    MPI_Info_set(info, "accumulate_ops", "same_op");
    MPI_Info_set(info, "accumulate_ordering", "none");

    MPI_Win win;
    MPI_Win_create(
        pointer, size * sizeof(Scalar), sizeof(Scalar), info, comm, &win);

    MPI_Info_free(&info);

    return win;
}

template <typename Scalar>
void copy(MPI_Comm comm,
          int rank,
          int div,
          Interval &P,
          Scalar *in,
          Scalar *out,
          Scalar *reshuffle_buffer,
          std::vector<std::vector<int>> &size_before,
          std::vector<int> &total_before,
          int total_after) {
    int gp, off;
    std::tie(gp, off) = P.locate_in_subinterval(div, rank);

    int relative_rank = rank - P.first();
    int local_size = total_before[relative_rank];

    MPI_Win win = create_window(comm, in, local_size, true);
    MPI_Win_fence(MPI_MODE_NOPRECEDE + MPI_MODE_NOPUT, win);

    int n_blocks = size_before[relative_rank].size();
    std::vector<int> rank_offset(div);

    auto mpi_type = mpi_mapper<Scalar>::getType();
    int displacement = 0;
    for (int block = 0; block < n_blocks; block++) {
        for (int rank = 0; rank < div; ++rank) {
            int target = P.locate_in_interval(div, rank, off);
            int b_size = size_before[target][block];

            MPI_Get(out + displacement,
                    b_size,
                    mpi_type,
                    rank,
                    rank_offset[rank],
                    b_size,
                    mpi_type,
                    win);

            rank_offset[rank] += b_size;
            displacement += b_size;
        }
    }

    MPI_Win_fence(MPI_MODE_NOSUCCEED, win);
    MPI_Win_free(&win);

#ifdef DEBUG
    std::cout << "Content of the copied matrix in rank " << rank
              << " is now: " << std::endl;
    for (int j = 0; j < rank_offset[gp]; j++) {
        std::cout << out[j] << ", ";
    }
    std::cout << std::endl;
#endif
}

template <typename Scalar>
void reduce(MPI_Comm comm,
            int rank,
            int div,
            Interval &P,
            Scalar *in,
            Scalar *out,
            Scalar *reshuffle_buffer,
            Scalar *reduce_buffer,
            std::vector<std::vector<int>> &c_current,
            std::vector<int> &c_total_current,
            std::vector<std::vector<int>> &c_expanded,
            std::vector<int> &c_total_expanded,
            Scalar beta) {
    auto mpi_type = mpi_mapper<Scalar>::getType();
    // int div = strategy_->divisor(step);
    // int gp, off;
    // std::tie(gp, off) = group_and_offset(P, div);
    int gp, off;
    std::tie(gp, off) = P.locate_in_subinterval(div, rank);

    int n_blocks = c_expanded[off].size();

    int target = P.locate_in_interval(div, gp, off);
    int local_size = c_total_current[target];

    // initilize C to 0 if beta = 0 since accumulate will do additions over
    // this array
    if (beta == Scalar{0}) {
        memset(out, 0, local_size * sizeof(Scalar));
    }

    MPI_Win win = create_window(comm, out, local_size, true);
    MPI_Win_fence(MPI_MODE_NOPRECEDE + MPI_MODE_NOSTORE, win);

    int displacement = 0;
    std::vector<int> rank_offset(div);
    // go through the communication ring
    for (int block = 0; block < n_blocks; ++block) {
        for (int i = 0; i < div; ++i) {
            int target = P.locate_in_interval(div, i, off);
            int b_size = c_current[target][block];

            MPI_Accumulate(in + displacement,
                           b_size,
                           mpi_type,
                           i,
                           rank_offset[i],
                           b_size,
                           mpi_type,
                           MPI_SUM,
                           win);

            displacement += b_size;
            rank_offset[i] += b_size;
        }
    }

    MPI_Win_fence(MPI_MODE_NOSUCCEED, win);
    MPI_Win_free(&win);
}

template <typename Scalar>
void comm_task_mn_split_polling(int divisor,
                                int gp,
                                Scalar *original_matrix,
                                Scalar *expanded_matrix,
                                Interval m,
                                Interval k,
                                std::vector<int> &displacements,
                                std::atomic_int &ready,
                                MPI_Comm comm) {
    PE(multiply_communication_other);
    // copy the matrix that wasn't divided in this step
    int local_size = m.length() * k.subinterval(divisor, gp).length();

    auto mpi_type = mpi_mapper<Scalar>::getType();
    MPI_Win win = create_window(comm, original_matrix, local_size, false);
    // MPI_Comm mpi_comm = comm.active_comm(step);
    MPI_Win_lock_all(MPI_MODE_NOCHECK, win);

    int dist = 1;
    while (dist < divisor) {
        int rank = (gp + dist) % divisor;
        int b_size = m.length() * k.subinterval(divisor, rank).length();

        PL();
        PE(multiply_communication_copy);
        MPI_Request req;
        MPI_Rget(expanded_matrix + m.length() * displacements[rank],
                 b_size,
                 mpi_type,
                 rank,
                 0,
                 b_size,
                 mpi_type,
                 win,
                 &req);

        int finished = false;
        while (!finished) {
            MPI_Test(&req, &finished, MPI_STATUS_IGNORE);
            if (!finished) {
                std::this_thread::yield();
            } else {
                ready++;
            }
        }
        PL();

        PE(multiply_communication_other);
        dist++;
    }

    MPI_Win_unlock_all(win);
    MPI_Win_free(&win);
    PL();
}

template <typename Scalar>
void comm_task_mn_split_busy_waiting(int divisor,
                                     int gp,
                                     Scalar *original_matrix,
                                     Scalar *expanded_matrix,
                                     Interval m,
                                     Interval k,
                                     std::vector<int> &displacements,
                                     std::atomic_int &ready,
                                     MPI_Comm comm) {
    // copy the matrix that wasn't divided in this step
    PE(multiply_communication_other);
    int local_size = m.length() * k.subinterval(divisor, gp).length();

    MPI_Win win = create_window(comm, original_matrix, local_size, false);

#ifdef DEBUG
    std::cout << "window content: " << std::endl;
    for (int i = 0; i < local_size; ++i) {
        std::cout << *(original_matrix + i) << ", ";
    }
    std::cout << std::endl;
#endif

    // MPI_Comm mpi_comm = comm.active_comm(step);
    // MPI_Win_lock_all(MPI_MODE_NOCHECK, win);
    MPI_Win_lock_all(MPI_MODE_NOCHECK, win);

    auto mpi_type = mpi_mapper<Scalar>::getType();
    int dist = 1;
    while (dist < divisor) {
        int rank = (gp + dist) % divisor;
        int b_size = m.length() * k.subinterval(divisor, rank).length();
        PL();
#ifdef DEBUG
        std::cout << "Getting a piece from rank " << rank << std::endl;
#endif
        PE(multiply_communication_copy);
        MPI_Get(expanded_matrix + m.length() * displacements[rank],
                b_size,
                mpi_type,
                rank,
                0,
                b_size,
                mpi_type,
                win);

        // flush completes the operation locally
        // but since this is a Get operation,
        // then it also means that after flush
        // it will also be completed remotely
        MPI_Win_flush_local(rank, win);
        PL();

        PE(multiply_communication_other);
        dist++;
        ready++;
    }

    MPI_Win_unlock_all(win);
    MPI_Win_free(&win);
    PL();
}

/* OVERLAP OF M SPLIT WITH OPENMP
 * template<typename Scalar>
    void overlap_m_split(context& ctx, MPI_Comm comm, int rank, int
divisor, CosmaMatrix& matrixA, CosmaMatrix& matrixB, CosmaMatrix& matrixC,
            Interval& m, Interval& n, Interval& k, Interval& P, Scalar beta)
{ PE(multiply_communication_copy);

        int gp, off;
        std::tie(gp, off) = P.locate_in_subinterval(divisor, rank);

        CosmaMatrix& expanded_mat = matrixB;
        int buffer_idx = expanded_mat.buffer_index();
        expanded_mat.advance_buffer();

        Scalar* original_matrix = expanded_mat.current_matrix();
        Scalar* expanded_matrix = expanded_mat.buffer_ptr();

        // interval of m that this rank owns from this step on
        Interval newm = m.subinterval(divisor, gp);

        // copy the matrix that wasn't divided in this step
        int local_size = k.length() * n.subinterval(divisor, gp).length();

        // offsets in the expanded matrix for each rank
        std::vector<int> displacements_n(divisor);
        int disp = 0;

        for (int rank = 0; rank < divisor; ++rank) {
            displacements_n[rank] = disp;
            disp += n.subinterval(divisor, rank).length();
        }
        // b: k * disp
        // c: newm * disp

        Scalar* prev_a = matrixA.current_matrix();
        Scalar* prev_b = expanded_matrix;
        Scalar* prev_c = matrixC.current_matrix();

        MPI_Win win = create_window(comm, original_matrix, local_size,
false);

        auto mpi_type = mpi_mapper<Scalar>::getType();

#ifdef DEBUG
        std::cout << "window content: " << std::endl;
        for (int i = 0; i < local_size; ++i) {
            std::cout << *(original_matrix + i) << ", ";
        }
        std::cout << std::endl;
#endif

        // MPI_Comm mpi_comm = comm.active_comm(step);
        // MPI_Win_lock_all(MPI_MODE_NOCHECK, win);
        MPI_Win_lock_all(MPI_MODE_NOCHECK, win);

#pragma omp parallel num_threads(2)
        {
#pragma omp single nowait
#pragma omp critical
        {
            // compute the piece that is already owned
            Scalar* pointer_b = original_matrix;
            Scalar* pointer_c = prev_c + newm.length() *
displacements_n[gp];

            matrixB.set_current_matrix(pointer_b);
            matrixC.set_current_matrix(pointer_c);

            PL();
            local_multiply(ctx, matrixA.current_matrix(),
matrixB.current_matrix(), matrixC.current_matrix(), newm.length(),
                    n.subinterval(divisor, gp).length(), k.length(), beta);
            PE(multiply_communication_copy);
        }

#pragma omp single nowait
        for (int dist = 1; dist < divisor; dist++) {
            int rank = (gp+dist)%divisor;
            int b_size = k.length() * n.subinterval(divisor, rank).length();

            MPI_Get(expanded_matrix + k.length() * displacements_n[rank],
b_size, mpi_type, rank, 0, b_size, mpi_type, win);

            // flush completes the operation locally
            // but since this is a Get operation,
            // then it also means that after flush
            // it will also be completed remotely
            MPI_Win_flush_local(rank, win);

#pragma omp task firstprivate(dist, divisor)
#pragma omp critical
                {
                    // Compute the piece that has arrived
                    Scalar* pointer_b = expanded_matrix + k.length() *
displacements_n[rank]; Scalar* pointer_c = prev_c + newm.length() *
displacements_n[rank];

                    matrixB.set_current_matrix(pointer_b);
                    matrixC.set_current_matrix(pointer_c);

                    PL();
                    local_multiply_cpu(matrixA.current_matrix(),
matrixB.current_matrix(), matrixC.current_matrix(), newm.length(),
                            n.subinterval(divisor, rank).length(),
k.length(), beta);
                    // local_multiply(ctx, matrixA.current_matrix(),
matrixB.current_matrix(),
                    //         matrixC.current_matrix(), newm.length(),
                    //         n.subinterval(divisor, rank).length(),
k.length(), beta); PE(multiply_communication_copy);
                }
            }
#pragma omp taskwait
        }

        MPI_Win_unlock_all(win);
        MPI_Win_free(&win);

        expanded_mat.set_current_matrix(original_matrix);
        expanded_mat.set_buffer_index(buffer_idx);
        matrixC.set_current_matrix(prev_c);

        PL();
    }
*/
// ***********************************
//           DIVISION BY M
// ***********************************
template <typename Scalar>
void overlap_m_split(bool use_busy_waiting,
                     cosma_context<Scalar> *ctx,
                     MPI_Comm comm,
                     int rank,
                     int divisor,
                     CosmaMatrix<Scalar> &matrixA,
                     CosmaMatrix<Scalar> &matrixB,
                     CosmaMatrix<Scalar> &matrixC,
                     Interval &m,
                     Interval &n,
                     Interval &k,
                     Interval &P,
                     Scalar alpha,
                     Scalar beta) {
    PE(multiply_communication_other);
    int gp, off;
    std::tie(gp, off) = P.locate_in_subinterval(divisor, rank);

    CosmaMatrix<Scalar> &expanded_mat = matrixB;
    int buffer_idx = expanded_mat.buffer_index();
    expanded_mat.advance_buffer();

    Scalar *original_matrix = expanded_mat.current_matrix();
    Scalar *expanded_matrix = expanded_mat.buffer_ptr();

    // interval of m that this rank owns from this step on
    Interval newm = m.subinterval(divisor, gp);

    // copy the matrix that wasn't divided in this step
    int local_size = k.length() * n.subinterval(divisor, gp).length();

    // offsets in the expanded matrix for each rank
    std::vector<int> displacements_n(divisor);
    int disp = 0;

    for (int rank = 0; rank < divisor; ++rank) {
        displacements_n[rank] = disp;
        disp += n.subinterval(divisor, rank).length();
    }
    // b: k * disp
    // c: newm * disp

    std::atomic_int ready(0);
    std::thread comm_thread(use_busy_waiting
                                ? comm_task_mn_split_busy_waiting<Scalar>
                                : comm_task_mn_split_polling<Scalar>,
                            divisor,
                            gp,
                            original_matrix,
                            expanded_matrix,
                            k,
                            n,
                            std::ref(displacements_n),
                            std::ref(ready),
                            comm);

    Scalar *prev_a = matrixA.current_matrix();
    Scalar *prev_b = expanded_matrix;
    Scalar *prev_c = matrixC.current_matrix();

    // compute the piece that is already owned
    Scalar *pointer_b = original_matrix;
    Scalar *pointer_c = prev_c + newm.length() * displacements_n[gp];

    matrixB.set_current_matrix(pointer_b);
    matrixC.set_current_matrix(pointer_c);

    PL();
    bool copy_c_back = true;
    local_multiply(ctx,
                   matrixA.current_matrix(),
                   matrixB.current_matrix(),
                   matrixC.current_matrix(),
                   newm.length(),
                   n.subinterval(divisor, gp).length(),
                   k.length(),
                   alpha,
                   beta,
                   copy_c_back);
    PE(multiply_communication_other);

    int dist = 1;
    while (dist < divisor) {
        while (ready > 0) {
            int idx = (gp + dist) % divisor;

            // Compute the piece that has arrived
            Scalar *pointer_b =
                expanded_matrix + k.length() * displacements_n[idx];
            Scalar *pointer_c = prev_c + newm.length() * displacements_n[idx];

            matrixB.set_current_matrix(pointer_b);
            matrixC.set_current_matrix(pointer_c);

            PL();
            local_multiply(ctx,
                           matrixA.current_matrix(),
                           matrixB.current_matrix(),
                           matrixC.current_matrix(),
                           newm.length(),
                           n.subinterval(divisor, idx).length(),
                           k.length(),
                           alpha,
                           beta,
                           copy_c_back);
            PE(multiply_communication_copy);
            ready--;
            dist++;
        }
    }

    expanded_mat.set_current_matrix(original_matrix);
    expanded_mat.set_buffer_index(buffer_idx);
    matrixC.set_current_matrix(prev_c);

    comm_thread.join();
    PL();
}

// ***********************************
//           DIVISION BY N
// ***********************************
template <typename Scalar>
void overlap_n_split(bool use_busy_waiting,
                     cosma_context<Scalar> *ctx,
                     MPI_Comm comm,
                     int rank,
                     int divisor,
                     CosmaMatrix<Scalar> &matrixA,
                     CosmaMatrix<Scalar> &matrixB,
                     CosmaMatrix<Scalar> &matrixC,
                     Interval &m,
                     Interval &n,
                     Interval &k,
                     Interval &P,
                     Scalar alpha,
                     Scalar beta) {
    PE(multiply_communication_other);
    int gp, off;
    std::tie(gp, off) = P.locate_in_subinterval(divisor, rank);

    CosmaMatrix<Scalar> &expanded_mat = matrixA;

    int buffer_idx = expanded_mat.buffer_index();
    expanded_mat.advance_buffer();

    Scalar *original_matrix = expanded_mat.current_matrix();
    Scalar *expanded_matrix = expanded_mat.buffer_ptr();
    // expanded_mat.set_current_matrix(expanded_matrix);

    Scalar *prev_a = expanded_matrix;
    Scalar *prev_b = matrixB.current_matrix();

    Interval newn = n.subinterval(divisor, gp);

    int local_size = m.length() * k.subinterval(divisor, gp).length();

    // offsets in the expanded matrix for each rank
    std::vector<int> displacements_k(divisor);
    int disp_k = 0;
    for (int rank = 0; rank < divisor; ++rank) {
        displacements_k[rank] = disp_k;
        disp_k += k.subinterval(divisor, rank).length();
    }
    // a: m * disp

    // memory enough for the largest block
    // used to overlap communication and computation
    std::vector<Scalar> block_buffer(
        newn.length() * math_utils::int_div_up(k.length(), divisor));
    // std::cout << "block buffer size = " << block_buffer.size() <<
    // std::endl;

    std::atomic_int ready(1);
    std::thread comm_thread(use_busy_waiting
                                ? comm_task_mn_split_busy_waiting<Scalar>
                                : comm_task_mn_split_polling<Scalar>,
                            divisor,
                            gp,
                            original_matrix,
                            expanded_matrix,
                            m,
                            k,
                            std::ref(displacements_k),
                            std::ref(ready),
                            comm);

    int dist = 0;
    while (dist < divisor) {
        while (ready > 0) {
            int idx = (gp + dist) % divisor;

            // Compute the piece that has arrived
            Scalar *pointer_a =
                dist == 0
                    ? original_matrix
                    : (expanded_matrix + m.length() * displacements_k[idx]);
            // Scalar* pointer_b = switch_buffers ? buffer2.data() :
            // buffer1.data();
            Scalar *pointer_b = block_buffer.data();

            for (int col = 0; col < newn.length(); ++col) {
                int column_size = k.subinterval(divisor, idx).length();
                int start = displacements_k[idx] + k.length() * col;
                std::memcpy(pointer_b + col * column_size,
                            prev_b + start,
                            column_size * sizeof(Scalar));
            }

            matrixA.set_current_matrix(pointer_a);
            matrixB.set_current_matrix(pointer_b);

            Scalar new_beta = dist == 0 ? beta : Scalar(1);
            PL();
            bool copy_c_back = true;
            local_multiply(ctx,
                           matrixA.current_matrix(),
                           matrixB.current_matrix(),
                           matrixC.current_matrix(),
                           m.length(),
                           newn.length(),
                           k.subinterval(divisor, idx).length(),
                           alpha,
                           new_beta,
                           copy_c_back);
            PE(multiply_communication_other);

            dist++;
            ready--;
        }
    }
    comm_thread.join();

    // revert the current matrix
    expanded_mat.set_buffer_index(buffer_idx);
    expanded_mat.set_current_matrix(original_matrix);
    matrixB.set_current_matrix(prev_b);

    PL();
}

template <typename Scalar>
void comm_task_k_split(int divisor,
                       int gp,
                       int off,
                       int jump_size,
                       Scalar *expanded_matrix,
                       Scalar *recv_buffer,
                       Interval m,
                       Interval n,
                       Interval P,
                       std::vector<int> &displacements,
                       int &ready,
                       std::mutex &mtx,
                       std::condition_variable &cv,
                       MPI_Comm comm) {
    PE(multiply_communication_other);

    int local_size = m.length() * n.subinterval(divisor, gp).length();
    MPI_Win win = create_window(comm, recv_buffer, local_size, false);
    auto mpi_type = mpi_mapper<Scalar>::getType();

    int packages = 0;
    int i = 0;
    while (packages < divisor) {
        std::unique_lock<std::mutex> lk(mtx);
        cv.wait(lk, [packages, divisor, jump_size, &ready]() {
            int diff = ready - packages;
            return diff >= jump_size || (divisor - packages < jump_size);
        });

        packages = ready;
        lk.unlock();
        packages = std::min(packages, divisor);

        int diff = packages - i;
        while (i < packages) {
            int idx = (gp + i) % divisor;
            Scalar *pointer_c =
                expanded_matrix + m.length() * displacements[idx];
            int b_size = m.length() * n.subinterval(divisor, idx).length();

            PL();
            PE(multiply_communication_reduce);
            MPI_Win_lock(MPI_LOCK_EXCLUSIVE, idx, 0, win);
            MPI_Accumulate(pointer_c,
                           b_size,
                           mpi_type,
                           idx,
                           0,
                           b_size,
                           mpi_type,
                           MPI_SUM,
                           win);
            MPI_Win_unlock(idx, win);
            PL();
            PE(multiply_communication_other);
            i++;
        }
    }

    MPI_Win_free(&win);
    PL();
}

template <typename Scalar>
void compute(cosma_context<Scalar> *ctx,
             CosmaMatrix<Scalar> &A,
             CosmaMatrix<Scalar> &B,
             CosmaMatrix<Scalar> &C,
             Scalar *pointer_b,
             Scalar *pointer_c,
             Interval &m,
             Interval &n,
             Interval &k,
             std::vector<int> &displacements_n,
             Scalar alpha,
             Scalar beta,
             int start,
             int end) {
    if (start >= end)
        return;

    int n_length = 0;
    if (end >= displacements_n.size()) {
        n_length = n.length() - displacements_n[start];
    } else {
        n_length = displacements_n[end] - displacements_n[start];
    }

    pointer_b += k.length() * displacements_n[start];
    pointer_c += m.length() * displacements_n[start];
    // Scalar* b = pointer_b + k.length() * displacements_n[i];
    // Scalar* c = pointer_c + m.length() * displacements_n[i];

    B.set_current_matrix(pointer_b);
    C.set_current_matrix(pointer_c);
    // B.set_current_matrix(b);
    // C.set_current_matrix(c);

    PL();
    bool copy_c_back = true;
    local_multiply(ctx,
                   A.current_matrix(),
                   B.current_matrix(),
                   C.current_matrix(),
                   m.length(),
                   n_length,
                   k.length(),
                   alpha,
                   beta,
                   copy_c_back);
    PE(multiply_communication_other);
}

// ***********************************
//           DIVISION BY K
// ***********************************
template <typename Scalar>
void overlap_k_split(cosma_context<Scalar> *ctx,
                     MPI_Comm comm,
                     int rank,
                     int divisor,
                     CosmaMatrix<Scalar> &matrixA,
                     CosmaMatrix<Scalar> &matrixB,
                     CosmaMatrix<Scalar> &matrixC,
                     Interval &m,
                     Interval &n,
                     Interval &k,
                     Interval &P,
                     Scalar alpha,
                     Scalar beta) {
    PE(multiply_communication_other);
    // int divisor = strategy.divisor(step);
    int gp, off;
    std::tie(gp, off) = P.locate_in_subinterval(divisor, rank);

    CosmaMatrix<Scalar> &expanded_mat = matrixC;
    int buffer_idx = expanded_mat.buffer_index();
    expanded_mat.advance_buffer();

    Scalar *original_matrix = expanded_mat.current_matrix();
    Scalar *expanded_matrix = expanded_mat.buffer_ptr();

    expanded_mat.set_buffer_index(buffer_idx);
    expanded_mat.set_current_matrix(original_matrix);

    int local_size = m.length() * n.subinterval(divisor, gp).length();

    auto accumulate_buffer = 
        (beta != Scalar{0}) ? expanded_mat.reduce_buffer_ptr() : original_matrix;
    std::fill(accumulate_buffer, accumulate_buffer + local_size, Scalar{0});

    Interval newk = k.subinterval(divisor, gp);

    std::vector<int> displacements_n(divisor);
    int disp = 0;
    for (int rank = 0; rank < divisor; ++rank) {
        displacements_n[rank] = disp;
        disp += n.subinterval(divisor, rank).length();
    }
    // c: m * displacements_n
    // b: newk * displacements_n

    // std::atomic_int ready(0);
    int ready = 0;
    std::mutex mtx;
    std::condition_variable cv;

    int comp_comm_ratio = 1;
    int target_jump_size = std::min(comp_comm_ratio, divisor);

    std::thread comm_task(comm_task_k_split<Scalar>,
                          divisor,
                          gp,
                          off,
                          target_jump_size,
                          expanded_matrix,
                          accumulate_buffer,
                          m,
                          n,
                          P,
                          std::ref(displacements_n),
                          std::ref(ready),
                          std::ref(mtx),
                          std::ref(cv),
                          comm);

    Scalar *prev_a = matrixA.current_matrix();
    Scalar *prev_b = matrixB.current_matrix();
    Scalar *prev_c = expanded_matrix;

    int remainder_packages = 0;

    if (target_jump_size == divisor) {
        compute(ctx,
                matrixA,
                matrixB,
                matrixC,
                prev_b,
                prev_c,
                m,
                n,
                newk,
                std::ref(displacements_n),
                alpha,
                Scalar{0},
                0,
                divisor);

        std::unique_lock<std::mutex> lk(mtx);
        ready = divisor;
        lk.unlock();
        cv.notify_one();
    } else {
        int processed = 0;
        int start = gp;
        int end = gp;
        while (processed < divisor) {
            int jump_size = target_jump_size - remainder_packages;
            remainder_packages = 0;
            end = (start + jump_size) % divisor;

            if (start < end) {
                if (start < gp) {
                    end = std::min(end, gp);
                }

                compute(ctx,
                        matrixA,
                        matrixB,
                        matrixC,
                        prev_b,
                        prev_c,
                        m,
                        n,
                        newk,
                        std::ref(displacements_n),
                        alpha,
                        Scalar{0},
                        start,
                        end);

                processed += end - start;
                std::unique_lock<std::mutex> lk(mtx);
                ready += end - start;
                lk.unlock();
                cv.notify_one();

                if (processed < divisor) {
                    int next_end = end + 1;
                    if (next_end <= divisor) {
                        compute(ctx,
                                matrixA,
                                matrixB,
                                matrixC,
                                prev_b,
                                prev_c,
                                m,
                                n,
                                newk,
                                std::ref(displacements_n),
                                alpha,
                                Scalar{0},
                                end,
                                next_end);
                        processed++;
                        remainder_packages = 1;

                        std::unique_lock<std::mutex> lk(mtx);
                        ready++;
                        lk.unlock();
                        cv.notify_one();
                    }
                }
            } else {
                if (end >= gp) {
                    end = std::min(end, gp);
                }

                compute(ctx,
                        matrixA,
                        matrixB,
                        matrixC,
                        prev_b,
                        prev_c,
                        m,
                        n,
                        newk,
                        std::ref(displacements_n),
                        alpha,
                        Scalar{0},
                        start,
                        divisor);
                compute(ctx,
                        matrixA,
                        matrixB,
                        matrixC,
                        prev_b,
                        prev_c,
                        m,
                        n,
                        newk,
                        std::ref(displacements_n),
                        alpha,
                        Scalar{0},
                        0,
                        end);

                processed += divisor - start + end;

                std::unique_lock<std::mutex> lk(mtx);
                ready += divisor - start + end;
                lk.unlock();

                cv.notify_one();

                if (processed < divisor) {
                    int next_end = end + 1;
                    if (next_end <= gp) {
                        compute(ctx,
                                matrixA,
                                matrixB,
                                matrixC,
                                prev_b,
                                prev_c,
                                m,
                                n,
                                newk,
                                std::ref(displacements_n),
                                alpha,
                                Scalar{0},
                                end,
                                next_end);
                        processed++;
                        remainder_packages = 1;

                        std::unique_lock<std::mutex> lk(mtx);
                        ready++;
                        lk.unlock();
                        cv.notify_one();
                    }
                }
            }
            start = (1 + end) % divisor; // t = (end + 1) % divisor;
            // start = (end) % divisor;
        }
        if (remainder_packages > 0) {
            cv.notify_one();
        }
    }

    comm_task.join();

    if (beta != Scalar{0}) {
        for (unsigned i = 0u; i < local_size; ++i) {
            original_matrix[i] = original_matrix[i] * beta + accumulate_buffer[i];
        }
    }

    PL();
}

template <typename Scalar>
void overlap_comm_and_comp(cosma_context<Scalar> *ctx,
                           MPI_Comm comm,
                           int rank,
                           const Strategy strategy,
                           CosmaMatrix<Scalar> &matrixA,
                           CosmaMatrix<Scalar> &matrixB,
                           CosmaMatrix<Scalar> &matrixC,
                           Interval &m,
                           Interval &n,
                           Interval &k,
                           Interval &P,
                           size_t step,
                           Scalar alpha,
                           Scalar beta) {
    bool use_busy_waiting = strategy.use_busy_waiting;
    int divisor = strategy.divisor(step);
    if (strategy.split_m(step)) {
        overlap_m_split(use_busy_waiting,
                        ctx,
                        comm,
                        rank,
                        divisor,
                        matrixA,
                        matrixB,
                        matrixC,
                        m,
                        n,
                        k,
                        P,
                        alpha,
                        beta);
    } else if (strategy.split_n(step)) {
        overlap_n_split(use_busy_waiting,
                        ctx,
                        comm,
                        rank,
                        divisor,
                        matrixA,
                        matrixB,
                        matrixC,
                        m,
                        n,
                        k,
                        P,
                        alpha,
                        beta);
    } else {
        overlap_k_split(ctx,
                        comm,
                        rank,
                        divisor,
                        matrixA,
                        matrixB,
                        matrixC,
                        m,
                        n,
                        k,
                        P,
                        alpha,
                        beta);
    }
}

template void overlap_comm_and_comp<float>(cosma_context<float> *ctx,
                                           MPI_Comm comm,
                                           int rank,
                                           const Strategy strategy,
                                           CosmaMatrix<float> &matrixA,
                                           CosmaMatrix<float> &matrixB,
                                           CosmaMatrix<float> &matrixC,
                                           Interval &m,
                                           Interval &n,
                                           Interval &k,
                                           Interval &P,
                                           size_t step,
                                           float alpha,
                                           float beta);

template void overlap_comm_and_comp<double>(cosma_context<double> *ctx,
                                            MPI_Comm comm,
                                            int rank,
                                            const Strategy strategy,
                                            CosmaMatrix<double> &matrixA,
                                            CosmaMatrix<double> &matrixB,
                                            CosmaMatrix<double> &matrixC,
                                            Interval &m,
                                            Interval &n,
                                            Interval &k,
                                            Interval &P,
                                            size_t step,
                                            double alpha,
                                            double beta);

template void overlap_comm_and_comp<std::complex<float>>(
    cosma_context<std::complex<float>> *ctx,
    MPI_Comm comm,
    int rank,
    const Strategy strategy,
    CosmaMatrix<std::complex<float>> &matrixA,
    CosmaMatrix<std::complex<float>> &matrixB,
    CosmaMatrix<std::complex<float>> &matrixC,
    Interval &m,
    Interval &n,
    Interval &k,
    Interval &P,
    size_t step,
    std::complex<float> alpha,
    std::complex<float> beta);

template void overlap_comm_and_comp<std::complex<double>>(
    cosma_context<std::complex<double>> *ctx,
    MPI_Comm comm,
    int rank,
    const Strategy strategy,
    CosmaMatrix<std::complex<double>> &matrixA,
    CosmaMatrix<std::complex<double>> &matrixB,
    CosmaMatrix<std::complex<double>> &matrixC,
    Interval &m,
    Interval &n,
    Interval &k,
    Interval &P,
    size_t step,
    std::complex<double> alpha,
    std::complex<double> beta);

} // end namespace one_sided_communicator

} // namespace cosma


================================================
FILE: src/cosma/one_sided_communicator.hpp
================================================
#pragma once
#include <cosma/context.hpp>
#include <cosma/interval.hpp>
#include <cosma/matrix.hpp>
#include <cosma/strategy.hpp>

#include <mpi.h>

namespace cosma {

namespace one_sided_communicator {

template <typename Scalar>
void overlap_comm_and_comp(cosma_context<Scalar> *ctx,
                           MPI_Comm comm,
                           int rank,
                           const Strategy strategy,
                           CosmaMatrix<Scalar> &matrixA,
                           CosmaMatrix<Scalar> &matrixB,
                           CosmaMatrix<Scalar> &matrixC,
                           Interval &m,
                           Interval &n,
                           Interval &k,
                           Interval &P,
                           size_t step,
                           Scalar alpha,
                           Scalar beta);

}; // namespace one_sided_communicator

} // namespace cosma


================================================
FILE: src/cosma/pinned_buffers.cpp
================================================
#include <complex>
#include <cosma/pinned_buffers.hpp>

// container of pinned buffers
template <typename T>
void pinned_buffers<T>::add(T* ptr, std::size_t size) {
    auto elem_iter = list.find(ptr);
    // if already pinned
    if (elem_iter != list.end()) {
        // check if the requested size is > pinned size
        // and in that case unpin the ptr
        if (size > elem_iter->second) {
            // unpin
            auto status = gpu::runtime_api::host_unregister((void*) ptr);
            gpu::check_runtime_status(status);

            // pin with the new size
            status = gpu::runtime_api::host_register(
                    (void*) ptr,
                    size * sizeof(T),
                    gpu::runtime_api::flag::HostRegisterDefault);
            gpu::check_runtime_status(status);
            elem_iter->second = size;
        }
    } else {
        // if not pinned previously
        // pin the buffer
        auto status = gpu::runtime_api::host_register(
                (void*) ptr,
                size * sizeof(T),
                gpu::runtime_api::flag::HostRegisterDefault);
        gpu::check_runtime_status(status);
        list.emplace(ptr, size);
    }
}

template <typename T>
void pinned_buffers<T>::clear() {
    for (auto& elem : list) {
        // unpin the buffer
        auto status = gpu::runtime_api::host_unregister((void*) elem.first);
        gpu::check_runtime_status(status);
    }
    list.clear();
}

// template instantiation for pinned_buffers
template struct pinned_buffers<float>;
template struct pinned_buffers<double>;
template struct pinned_buffers<std::complex<float>>;
template struct pinned_buffers<std::complex<double>>;


================================================
FILE: src/cosma/pinned_buffers.hpp
================================================
#ifdef COSMA_HAVE_GPU
#pragma once
#include <unordered_map>
#include <complex>

#include <Tiled-MM/util.hpp>

// container of pinned buffers
template <typename T>
struct pinned_buffers {
    std::unordered_map<T*, std::size_t> list;

    void add(T* ptr, std::size_t size);

    void clear();
};
#endif


================================================
FILE: src/cosma/prefixed_pxgemm.cpp
================================================
#include <cosma/cosma_pxgemm.hpp>

extern "C" {
#include <cosma/prefixed_pxgemm.h>

// scalapack routines that will be invoked if problem too small for COSMA
void psgemm_(const char* trans_a, const char* trans_b, const int* m, const int* n, const int* k,
        const float* alpha, const float* a, const int* ia, const int* ja, const int* desca,
        const float* b, const int* ib, const int* jb, const int* descb, const float* beta,
        float* c, const int* ic, const int* jc, const int* descc);

void pdgemm_(const char* trans_a, const char* trans_b, const int* m, const int* n, const int* k,
        const double* alpha, const double* a, const int* ia, const int* ja, const int* desca,
        const double* b, const int* ib, const int* jb, const int* descb, const double* beta,
        double* c, const int* ic, const int* jc, const int* descc);

void pcgemm_(const char* trans_a, const char* trans_b, const int* m, const int* n, const int* k,
        const float* alpha, const float* a, const int* ia, const int* ja, const int* desca,
        const float* b, const int* ib, const int* jb, const int* descb, const float* beta,
        float* c, const int* ic, const int* jc, const int* descc);

void pzgemm_(const char* trans_a, const char* trans_b, const int* m, const int* n, const int* k,
        const double* alpha, const double* a, const int* ia, const int* ja, const int* desca,
        const double* b, const int* ib, const int* jb, const int* descb, const double* beta,
        double* c, const int* ic, const int* jc, const int* descc);

// Reimplement ScaLAPACK signatures functions
void cosma_pdgemm_(const char* trans_a,
            const char* trans_b,
            const int* m,
            const int* n,
            const int* k,
            const double* alpha,
            const double *a,
            const int* ia,
            const int* ja,
            const int* desca,
            const double *b,
            const int* ib,
            const int* jb,
            const int *descb,
            const double* beta,
            double *c,
            const int* ic,
            const int* jc,
            const int* descc) {
    if (cosma::is_problem_too_small(*m, *n, *k)) {
        pdgemm_(trans_a, trans_b, m, n, k, alpha, a, ia, ja, desca, b, ib, jb, descb, beta, c, ic, jc, descc);
        return;
    }
    cosma::pxgemm<double>(*trans_a,
                  *trans_b,
                  *m,
                  *n,
                  *k,
                  *alpha,
                  a,
                  *ia,
                  *ja,
                  desca,
                  b,
                  *ib,
                  *jb,
                  descb,
                  *beta,
                  c,
                  *ic,
                  *jc,
                  descc);
}

void cosma_psgemm_(const char* trans_a,
             const char* trans_b,
             const int* m,
             const int* n,
             const int* k,
             const float* alpha,
             const float* a,
             const int* ia,
             const int* ja,
             const int* desca,
             const float* b,
             const int* ib,
             const int* jb,
             const int* descb,
             const float* beta,
             float* c,
             const int* ic,
             const int* jc,
             const int* descc) {
    if (cosma::is_problem_too_small(*m, *n, *k)) {
        psgemm_(trans_a, trans_b, m, n, k, alpha, a, ia, ja, desca, b, ib, jb, descb, beta, c, ic, jc, descc);
        return;
    }
    cosma::pxgemm<float>(*trans_a,
                 *trans_b,
                 *m,
                 *n,
                 *k,
                 *alpha,
                 a,
                 *ia,
                 *ja,
                 desca,
                 b,
                 *ib,
                 *jb,
                 descb,
                 *beta,
                 c,
                 *ic,
                 *jc,
                 descc);
}

void cosma_pcgemm_(const char* trans_a,
            const char* trans_b,
            const int* m,
            const int* n,
            const int* k,
            const float * alpha,
            const float * a,
            const int* ia,
            const int* ja,
            const int *desca,
            const float * b,
            const int* ib,
            const int* jb,
            const int *descb,
            const float * beta,
            float  *c,
            const int* ic,
            const int* jc,
            const int *descc) {
    if (cosma::is_problem_too_small(*m, *n, *k)) {
        pcgemm_(trans_a, trans_b, m, n, k, alpha, a, ia, ja, desca, b, ib, jb, descb, beta, c, ic, jc, descc);
        return;
    }

    cosma::pxgemm<std::complex<float>>(*trans_a,
                    *trans_b,
                    *m,
                    *n,
                    *k,
                    reinterpret_cast<const std::complex<float>&>(*alpha),
                    reinterpret_cast<const std::complex<float>*>(a),
                    *ia,
                    *ja,
                    desca,
                    reinterpret_cast<const std::complex<float>*>(b),
                    *ib,
                    *jb,
                    descb,
                    reinterpret_cast<const std::complex<float>&>(*beta),
                    reinterpret_cast<std::complex<float>*>(c),
                    *ic,
                    *jc,
                    descc);
}

void cosma_pzgemm_(const char* trans_a,
            const char* trans_b,
            const int* m,
            const int* n,
            const int* k,
            const double * alpha,
            const double * a,
            const int* ia,
            const int* ja,
            const int *desca,
            const double  *b,
            const int* ib,
            const int* jb,
            const int* descb,
            const double * beta,
            double * c,
            const int* ic,
            const int* jc,
            const int *descc) {
    if (cosma::is_problem_too_small(*m, *n, *k)) {
        pzgemm_(trans_a, trans_b, m, n, k, alpha, a, ia, ja, desca, b, ib, jb, descb, beta, c, ic, jc, descc);
        return;
    }

    cosma::pxgemm<std::complex<double>>(*trans_a,
                     *trans_b,
                     *m,
                     *n,
                     *k,
                     reinterpret_cast<const std::complex<double>&>(*alpha),
                     reinterpret_cast<const std::complex<double>*>(a),
                     *ia,
                     *ja,
                     desca,
                     reinterpret_cast<const std::complex<double>*>(b),
                     *ib,
                     *jb,
                     descb,
                     reinterpret_cast<const std::complex<double>&>(*beta),
                     reinterpret_cast<std::complex<double>*>(c),
                     *ic,
                     *jc,
                     descc);
}

// *********************************************************************************
// Same as previously, but with added underscore at the end.
// This is used for fortran interfaces, in case fortran expects these symbols
// *********************************************************************************

void cosma_psgemm(const char* trans_a, const char* transb, const int* m, const int* n, const int* k,
        const float* alpha, const float* a, const int* ia, const int* ja, const int* desca,
        const float* b, const int* ib, const int* jb, const int* descb, const float* beta,
        float* c, const int* ic, const int* jc, const int* descc) {
    cosma_psgemm_(trans_a, transb, m, n, k,
           alpha, a, ia, ja, desca,
           b, ib, jb, descb,
           beta, c, ic, jc, descc);
}

void cosma_pdgemm(const char* trans_a, const char* transb, const int* m, const int* n, const int* k,
        const double* alpha, const double* a, const int* ia, const int* ja, const int* desca,
        const double* b, const int* ib, const int* jb, const int* descb, const double* beta,
        double* c, const int* ic, const int* jc, const int* descc) {
    cosma_pdgemm_(trans_a, transb, m, n, k,
           alpha, a, ia, ja, desca,
           b, ib, jb, descb,
           beta, c, ic, jc, descc);
}

void cosma_pcgemm(const char* trans_a, const char* transb, const int* m, const int* n, const int* k,
        const float * alpha, const float * a, const int* ia,
        const int* ja, const int* desca, const float * b, const int* ib,
        const int* jb, const int* descb, const float * beta,
        float * c, const int* ic, const int* jc, const int* descc) {
    cosma_pcgemm_(trans_a, transb, m, n, k,
           alpha, a, ia, ja, desca,
           b, ib, jb, descb,
           beta, c, ic, jc, descc);
}

void cosma_pzgemm(const char* trans_a, const char* transb, const int* m, const int* n, const int* k,
        const double * alpha, const double * a, const int* ia,
        const int* ja, const int* desca, const double * b, const int* ib,
        const int* jb, const int* descb, const double * beta,
        double * c, const int* ic, const int* jc, const int* descc) {
    cosma_pzgemm_(trans_a, transb, m, n, k,
           alpha, a, ia, ja, desca,
           b, ib, jb, descb,
           beta, c, ic, jc, descc);
}

// *********************************************************************************
// Same as previously, but with double underscore at the end.
// This is used for fortran interfaces, in case fortran expects these symbols
// *********************************************************************************

void COSMA_PSGEMM_(const char* trans_a, const char* transb, const int* m, const int* n, const int* k,
        const float* alpha, const float* a, const int* ia, const int* ja, const int* desca,
        const float* b, const int* ib, const int* jb, const int* descb, const float* beta,
        float* c, const int* ic, const int* jc, const int* descc) {
    cosma_psgemm_(trans_a, transb, m, n, k,
           alpha, a, ia, ja, desca,
           b, ib, jb, descb,
           beta, c, ic, jc, descc);
}

void COSMA_PDGEMM_(const char* trans_a, const char* transb, const int* m, const int* n, const int* k,
        const double* alpha, const double* a, const int* ia, const int* ja, const int* desca,
        const double* b, const int* ib, const int* jb, const int* descb, const double* beta,
        double* c, const int* ic, const int* jc, const int* descc) {
    cosma_pdgemm_(trans_a, transb, m, n, k,
           alpha, a, ia, ja, desca,
           b, ib, jb, descb,
           beta, c, ic, jc, descc);
}

void COSMA_PCGEMM_(const char* trans_a, const char* transb, const int* m, const int* n, const int* k,
        const float * alpha, const float * a, const int* ia,
        const int* ja, const int* desca, const float * b, const int* ib,
        const int* jb, const int* descb, const float * beta,
        float * c, const int* ic, const int* jc, const int* descc) {
    cosma_pcgemm_(trans_a, transb, m, n, k,
           alpha, a, ia, ja, desca,
           b, ib, jb, descb,
           beta, c, ic, jc, descc);
}

void COSMA_PZGEMM_(const char* trans_a, const char* transb, const int* m, const int* n, const int* k,
        const double * alpha, const double * a, const int* ia,
        const int* ja, const int* desca, const double * b, const int* ib,
        const int* jb, const int* descb, const double * beta,
        double * c, const int* ic, const int* jc, const int* descc) {
    cosma_pzgemm_(trans_a, transb, m, n, k,
           alpha, a, ia, ja, desca,
           b, ib, jb, descb,
           beta, c, ic, jc, descc);
}

// *********************************************************************************
// Same as previously, but CAPITALIZED.
// This is used for fortran interfaces, in case fortran expects these symbols
// *********************************************************************************

void COSMA_PSGEMM(const char* trans_a, const char* transb, const int* m, const int* n, const int* k,
        const float* alpha, const float* a, const int* ia, const int* ja, const int* desca,
        const float* b, const int* ib, const int* jb, const int* descb, const float* beta,
        float* c, const int* ic, const int* jc, const int* descc) {
    cosma_psgemm_(trans_a, transb, m, n, k,
           alpha, a, ia, ja, desca,
           b, ib, jb, descb,
           beta, c, ic, jc, descc);
}

void COSMA_PDGEMM(const char* trans_a, const char* transb, const int* m, const int* n, const int* k,
        const double* alpha, const double* a, const int* ia, const int* ja, const int* desca,
        const double* b, const int* ib, const int* jb, const int* descb, const double* beta,
        double* c, const int* ic, const int* jc, const int* descc) {
    cosma_pdgemm_(trans_a, transb, m, n, k,
           alpha, a, ia, ja, desca,
           b, ib, jb, descb,
           beta, c, ic, jc, descc);
}

void COSMA_PCGEMM(const char* trans_a, const char* transb, const int* m, const int* n, const int* k,
        const float * alpha, const float * a, const int* ia,
        const int* ja, const int* desca, const float * b, const int* ib,
        const int* jb, const int* descb, const float * beta,
        float * c, const int* ic, const int* jc, const int* descc) {
    cosma_pcgemm_(trans_a, transb, m, n, k,
           alpha, a, ia, ja, desca,
           b, ib, jb, descb,
           beta, c, ic, jc, descc);
}

void COSMA_PZGEMM(const char* trans_a, const char* transb, const int* m, const int* n, const int* k,
        const double * alpha, const double * a, const int* ia,
        const int* ja, const int* desca, const double * b, const int* ib,
        const int* jb, const int* descb, const double * beta,
        double * c, const int* ic, const int* jc, const int* descc) {
    cosma_pzgemm_(trans_a, transb, m, n, k,
           alpha, a, ia, ja, desca,
           b, ib, jb, descb,
           beta, c, ic, jc, descc);
}
}


================================================
FILE: src/cosma/prefixed_pxgemm.h
================================================
#pragma once
#ifdef __cplusplus
extern "C" {
#endif
// ScaLAPACK API with COSMA prefix
void cosma_psgemm(const char* trans_a, const char* transb, const int* m, const int* n, const int* k,
        const float* alpha, const float* a, const int* ia, const int* ja, const int* desca,
        const float* b, const int* ib, const int* jb, const int* descb, const float* beta,
        float* c, const int* ic, const int* jc, const int* descc);

void cosma_pdgemm(const char* trans_a, const char* transb, const int* m, const int* n, const int* k,
        const double* alpha, const double* a, const int* ia, const int* ja, const int* desca,
        const double* b, const int* ib, const int* jb, const int* descb, const double* beta,
        double* c, const int* ic, const int* jc, const int* descc);

void cosma_pcgemm(const char* trans_a, const char* transb, const int* m, const int* n, const int* k,
        const float * alpha, const float * a, const int* ia,
        const int* ja, const int* desca, const float * b, const int* ib,
        const int* jb, const int* descb, const float * beta,
        float * c, const int* ic, const int* jc, const int* descc);

void cosma_pzgemm(const char* trans_a, const char* transb, const int* m, const int* n, const int* k,
        const double * alpha, const double * a, const int* ia,
        const int* ja, const int* desca, const double * b, const int* ib,
        const int* jb, const int* descb, const double * beta,
        double * c, const int* ic, const int* jc, const int* descc);

// *********************************************************************************
// Same as previously, but with added underscore at the end.
// This is used for fortran interfaces, in case fortran expects these symbols
// *********************************************************************************

void cosma_psgemm_(const char* trans_a, const char* transb, const int* m, const int* n, const int* k,
        const float* alpha, const float* a, const int* ia, const int* ja, const int* desca,
        const float* b, const int* ib, const int* jb, const int* descb, const float* beta,
        float* c, const int* ic, const int* jc, const int* descc);

void cosma_pdgemm_(const char* trans_a, const char* transb, const int* m, const int* n, const int* k,
        const double* alpha, const double* a, const int* ia, const int* ja, const int* desca,
        const double* b, const int* ib, const int* jb, const int* descb, const double* beta,
        double* c, const int* ic, const int* jc, const int* descc);

void cosma_pcgemm_(const char* trans_a, const char* transb, const int* m, const int* n, const int* k,
        const float * alpha, const float * a, const int* ia,
        const int* ja, const int* desca, const float * b, const int* ib,
        const int* jb, const int* descb, const float * beta,
        float * c, const int* ic, const int* jc, const int* descc);

void cosma_pzgemm_(const char* trans_a, const char* transb, const int* m, const int* n, const int* k,
        const double * alpha, const double * a, const int* ia,
        const int* ja, const int* desca, const double * b, const int* ib,
        const int* jb, const int* descb, const double * beta,
        double * c, const int* ic, const int* jc, const int* descc);

// *********************************************************************************
// Same as previously, but with double underscore at the end.
// This is used for fortran interfaces, in case fortran expects these symbols
// *********************************************************************************
void COSMA_PSGEMM_(const char* trans_a, const char* transb, const int* m, const int* n, const int* k,
        const float* alpha, const float* a, const int* ia, const int* ja, const int* desca,
        const float* b, const int* ib, const int* jb, const int* descb, const float* beta,
        float* c, const int* ic, const int* jc, const int* descc);

void COSMA_PDGEMM_(const char* trans_a, const char* transb, const int* m, const int* n, const int* k,
        const double* alpha, const double* a, const int* ia, const int* ja, const int* desca,
        const double* b, const int* ib, const int* jb, const int* descb, const double* beta,
        double* c, const int* ic, const int* jc, const int* descc);

void COSMA_PCGEMM_(const char* trans_a, const char* transb, const int* m, const int* n, const int* k,
        const float * alpha, const float * a, const int* ia,
        const int* ja, const int* desca, const float * b, const int* ib,
        const int* jb, const int* descb, const float * beta,
        float * c, const int* ic, const int* jc, const int* descc);

void COSMA_PZGEMM_(const char* trans_a, const char* transb, const int* m, const int* n, const int* k,
        const double * alpha, const double * a, const int* ia,
        const int* ja, const int* desca, const double * b, const int* ib,
        const int* jb, const int* descb, const double * beta,
        double * c, const int* ic, const int* jc, const int* descc);

// *********************************************************************************
// Same as previously, but CAPITALIZED.
// This is used for fortran interfaces, in case fortran expects these symbols
// *********************************************************************************

void COSMA_PSGEMM(const char* trans_a, const char* transb, const int* m, const int* n, const int* k,
        const float* alpha, const float* a, const int* ia, const int* ja, const int* desca,
        const float* b, const int* ib, const int* jb, const int* descb, const float* beta,
        float* c, const int* ic, const int* jc, const int* descc);

void COSMA_PDGEMM(const char* trans_a, const char* transb, const int* m, const int* n, const int* k,
        const double* alpha, const double* a, const int* ia, const int* ja, const int* desca,
        const double* b, const int* ib, const int* jb, const int* descb, const double* beta,
        double* c, const int* ic, const int* jc, const int* descc);

void COSMA_PCGEMM(const char* trans_a, const char* transb, const int* m, const int* n, const int* k,
        const float * alpha, const float * a, const int* ia,
        const int* ja, const int* desca, const float * b, const int* ib,
        const int* jb, const int* descb, const float * beta,
        float * c, const int* ic, const int* jc, const int* descc);

void COSMA_PZGEMM(const char* trans_a, const char* transb, const int* m, const int* n, const int* k,
        const double * alpha, const double * a, const int* ia,
        const int* ja, const int* desca, const double * b, const int* ib,
        const int* jb, const int* descb, const double * beta,
        double * c, const int* ic, const int* jc, const int* descc);

#ifdef __cplusplus
}
#endif


================================================
FILE: src/cosma/profiler.hpp
================================================
#pragma once

// The header makes semiprof an optional dependency that needs not be shipped when COSMA is installed.
//
#ifdef COSMA_WITH_PROFILING

#include <semiprof.hpp>

// prints the profiler summary
#define PP() std::cout << semiprof::profiler_summary() << "\n"
// clears the profiler (counts and timings)
#define PC() semiprof::profiler_clear()

#else
#define PE(name)
#define PL()
#define PP()
#define PC()
#endif


================================================
FILE: src/cosma/pxgemm.cpp
================================================
#include <cosma/cosma_pxgemm.hpp>

extern "C" {
#include <cosma/pxgemm.h>
#include <cosma/interpose.h>

// Reimplement ScaLAPACK signatures functions
INTERPOSE_C_VOID(psgemm_,
        (const char* trans_a, const char* trans_b, const int* m, const int* n, const int* k,
        const float* alpha, const float* a, const int* ia, const int* ja, const int* desca,
        const float* b, const int* ib, const int* jb, const int* descb, const float* beta,
        float* c, const int* ic, const int* jc, const int* descc),
        (trans_a, trans_b, m, n, k, alpha, a, ia, ja, desca, b, ib, jb, descb, beta, c, ic, jc, descc)
        ) {
    if (cosma::is_problem_too_small(*m, *n, *k)) {
        Real__psgemm_(trans_a, trans_b, m, n, k, alpha, a, ia, ja, desca, b, ib, jb, descb, beta, c, ic, jc, descc);
        return;
    }
    cosma::pxgemm<float>(*trans_a,
                 *trans_b,
                 *m,
                 *n,
                 *k,
                 *alpha,
                 a,
                 *ia,
                 *ja,
                 desca,
                 b,
                 *ib,
                 *jb,
                 descb,
                 *beta,
                 c,
                 *ic,
                 *jc,
                 descc);
}

INTERPOSE_C_VOID(pdgemm_, 
        (const char* trans_a, const char* trans_b, const int* m, const int* n, const int* k,
        const double* alpha, const double* a, const int* ia, const int* ja, const int* desca,
        const double* b, const int* ib, const int* jb, const int* descb, const double* beta,
        double* c, const int* ic, const int* jc, const int* descc),
        (trans_a, trans_b, m, n, k, alpha, a, ia, ja, desca, b, ib, jb, descb, beta, c, ic, jc, descc)
        ){
    if (cosma::is_problem_too_small(*m, *n, *k)) {
        Real__pdgemm_(trans_a, trans_b, m, n, k, alpha, a, ia, ja, desca, b, ib, jb, descb, beta, c, ic, jc, descc);
        return;
    }
    cosma::pxgemm<double>(*trans_a,
                  *trans_b,
                  *m,
                  *n,
                  *k,
                  *alpha,
                  a,
                  *ia,
                  *ja,
                  desca,
                  b,
                  *ib,
                  *jb,
                  descb,
                  *beta,
                  c,
                  *ic,
                  *jc,
                  descc);
}

INTERPOSE_C_VOID(pcgemm_, 
        (const char* trans_a, const char* trans_b, const int* m, const int* n, const int* k,
        const float * alpha, const float * a, const int* ia,
        const int* ja, const int* desca, const float * b, const int* ib,
        const int* jb, const int* descb, const float * beta,
        float * c, const int* ic, const int* jc, const int* descc),
        (trans_a, trans_b, m, n, k, alpha, a, ia, ja, desca, b, ib, jb, descb, beta, c, ic, jc, descc)
        ){
    if (cosma::is_problem_too_small(*m, *n, *k)) {
        Real__pcgemm_(trans_a, trans_b, m, n, k, alpha, a, ia, ja, desca, b, ib, jb, descb, beta, c, ic, jc, descc);
        return;
    }
    cosma::pxgemm<std::complex<float>>(*trans_a,
                    *trans_b,
                    *m,
                    *n,
                    *k,
                    reinterpret_cast<const std::complex<float>&>(*alpha),
                    reinterpret_cast<const std::complex<float>*>(a),
                    *ia,
                    *ja,
                    desca,
                    reinterpret_cast<const std::complex<float>*>(b),
                    *ib,
                    *jb,
                    descb,
                    reinterpret_cast<const std::complex<float>&>(*beta),
                    reinterpret_cast<std::complex<float>*>(c),
                    *ic,
                    *jc,
                    descc);
}

INTERPOSE_C_VOID(pzgemm_, 
        (const char* trans_a, const char* trans_b, const int* m, const int* n, const int* k,
        const double * alpha, const double * a, const int* ia,
        const int* ja, const int* desca, const double * b, const int* ib,
        const int* jb, const int* descb, const double * beta,
        double * c, const int* ic, const int* jc, const int* descc),
        (trans_a, trans_b, m, n, k, alpha, a, ia, ja, desca, b, ib, jb, descb, beta, c, ic, jc, descc)
        ){
    if (cosma::is_problem_too_small(*m, *n, *k)) {
        Real__pzgemm_(trans_a, trans_b, m, n, k, alpha, a, ia, ja, desca, b, ib, jb, descb, beta, c, ic, jc, descc);
        return;
    }
    cosma::pxgemm<std::complex<double>>(*trans_a,
                     *trans_b,
                     *m,
                     *n,
                     *k,
                     reinterpret_cast<const std::complex<double>&>(*alpha),
                     reinterpret_cast<const std::complex<double>*>(a),
                     *ia,
                     *ja,
                     desca,
                     reinterpret_cast<const std::complex<double>*>(b),
                     *ib,
                     *jb,
                     descb,
                     reinterpret_cast<const std::complex<double>&>(*beta),
                     reinterpret_cast<std::complex<double>*>(c),
                     *ic,
                     *jc,
                     descc);
}

// *********************************************************************************
// Same as previously, but with added underscore at the end.
// This is used for fortran interfaces, in case fortran expects these symbols
// *********************************************************************************
void psgemm(const char* trans_a, const char* trans_b, const int* m, const int* n, const int* k,
        const float* alpha, const float* a, const int* ia, const int* ja, const int* desca,
        const float* b, const int* ib, const int* jb, const int* descb, const float* beta,
        float* c, const int* ic, const int* jc, const int* descc) {
    psgemm_(trans_a, trans_b, m, n, k,
           alpha, a, ia, ja, desca,
           b, ib, jb, descb,
           beta, c, ic, jc, descc);
}

void pdgemm(const char* trans_a, const char* trans_b, const int* m, const int* n, const int* k,
        const double* alpha, const double* a, const int* ia, const int* ja, const int* desca,
        const double* b, const int* ib, const int* jb, const int* descb, const double* beta,
        double* c, const int* ic, const int* jc, const int* descc) {
    pdgemm_(trans_a, trans_b, m, n, k,
           alpha, a, ia, ja, desca,
           b, ib, jb, descb,
           beta, c, ic, jc, descc);
}

void pcgemm(const char* trans_a, const char* trans_b, const int* m, const int* n, const int* k,
        const float * alpha, const float * a, const int* ia,
        const int* ja, const int* desca, const float * b, const int* ib,
        const int* jb, const int* descb, const float * beta,
        float * c, const int* ic, const int* jc, const int* descc) {
    pcgemm_(trans_a, trans_b, m, n, k,
           alpha, a, ia, ja, desca,
           b, ib, jb, descb,
           beta, c, ic, jc, descc);
}

void pzgemm(const char* trans_a, const char* trans_b, const int* m, const int* n, const int* k,
        const double * alpha, const double * a, const int* ia,
        const int* ja, const int* desca, const double * b, const int* ib,
        const int* jb, const int* descb, const double * beta,
        double * c, const int* ic, const int* jc, const int* descc) {
    pzgemm_(trans_a, trans_b, m, n, k,
           alpha, a, ia, ja, desca,
           b, ib, jb, descb,
           beta, c, ic, jc, descc);
}

// *********************************************************************************
// Same as previously, but with double underscore at the end.
// This is used for fortran interfaces, in case fortran expects these symbols
// *********************************************************************************

void PSGEMM_(const char* trans_a, const char* trans_b, const int* m, const int* n, const int* k,
        const float* alpha, const float* a, const int* ia, const int* ja, const int* desca,
        const float* b, const int* ib, const int* jb, const int* descb, const float* beta,
        float* c, const int* ic, const int* jc, const int* descc) {
    psgemm_(trans_a, trans_b, m, n, k,
           alpha, a, ia, ja, desca,
           b, ib, jb, descb,
           beta, c, ic, jc, descc);
}

void PDGEMM_(const char* trans_a, const char* trans_b, const int* m, const int* n, const int* k,
        const double* alpha, const double* a, const int* ia, const int* ja, const int* desca,
        const double* b, const int* ib, const int* jb, const int* descb, const double* beta,
        double* c, const int* ic, const int* jc, const int* descc) {
    pdgemm_(trans_a, trans_b, m, n, k,
           alpha, a, ia, ja, desca,
           b, ib, jb, descb,
           beta, c, ic, jc, descc);
}

void PCGEMM_(const char* trans_a, const char* trans_b, const int* m, const int* n, const int* k,
        const float * alpha, const float * a, const int* ia,
        const int* ja, const int* desca, const float * b, const int* ib,
        const int* jb, const int* descb, const float * beta,
        float * c, const int* ic, const int* jc, const int* descc) {
    pcgemm_(trans_a, trans_b, m, n, k,
           alpha, a, ia, ja, desca,
           b, ib, jb, descb,
           beta, c, ic, jc, descc);
}

void PZGEMM_(const char* trans_a, const char* trans_b, const int* m, const int* n, const int* k,
        const double * alpha, const double * a, const int* ia,
        const int* ja, const int* desca, const double * b, const int* ib,
        const int* jb, const int* descb, const double * beta,
        double * c, const int* ic, const int* jc, const int* descc) {
    pzgemm_(trans_a, trans_b, m, n, k,
           alpha, a, ia, ja, desca,
           b, ib, jb, descb,
           beta, c, ic, jc, descc);
}

// *********************************************************************************
// Same as previously, but CAPITALIZED.
// This is used for fortran interfaces, in case fortran expects these symbols
// *********************************************************************************

void PSGEMM(const char* trans_a, const char* trans_b, const int* m, const int* n, const int* k,
        const float* alpha, const float* a, const int* ia, const int* ja, const int* desca,
        const float* b, const int* ib, const int* jb, const int* descb, const float* beta,
        float* c, const int* ic, const int* jc, const int* descc) {
    psgemm_(trans_a, trans_b, m, n, k,
           alpha, a, ia, ja, desca,
           b, ib, jb, descb,
           beta, c, ic, jc, descc);
}

void PDGEMM(const char* trans_a, const char* trans_b, const int* m, const int* n, const int* k,
        const double* alpha, const double* a, const int* ia, const int* ja, const int* desca,
        const double* b, const int* ib, const int* jb, const int* descb, const double* beta,
        double* c, const int* ic, const int* jc, const int* descc) {
    pdgemm_(trans_a, trans_b, m, n, k,
           alpha, a, ia, ja, desca,
           b, ib, jb, descb,
           beta, c, ic, jc, descc);
}

void PCGEMM(const char* trans_a, const char* trans_b, const int* m, const int* n, const int* k,
        const float * alpha, const float * a, const int* ia,
        const int* ja, const int* desca, const float * b, const int* ib,
        const int* jb, const int* descb, const float * beta,
        float * c, const int* ic, const int* jc, const int* descc) {
    pcgemm_(trans_a, trans_b, m, n, k,
           alpha, a, ia, ja, desca,
           b, ib, jb, descb,
           beta, c, ic, jc, descc);
}

void PZGEMM(const char* trans_a, const char* trans_b, const int* m, const int* n, const int* k,
        const double * alpha, const double * a, const int* ia,
        const int* ja, const int* desca, const double * b, const int* ib,
        const int* jb, const int* descb, const double * beta,
        double * c, const int* ic, const int* jc, const int* descc) {
    pzgemm_(trans_a, trans_b, m, n, k,
           alpha, a, ia, ja, desca,
           b, ib, jb, descb,
           beta, c, ic, jc, descc);
}
}


================================================
FILE: src/cosma/pxgemm.h
================================================
#pragma once
#ifdef __cplusplus
extern "C" {
#endif
// ScaLAPACK API (override)
void psgemm(const char* trans_a, const char* transb, const int* m, const int* n, const int* k,
        const float* alpha, const float* a, const int* ia, const int* ja, const int* desca,
        const float* b, const int* ib, const int* jb, const int* descb, const float* beta,
        float* c, const int* ic, const int* jc, const int* descc);

void pdgemm(const char* trans_a, const char* transb, const int* m, const int* n, const int* k,
        const double* alpha, const double* a, const int* ia, const int* ja, const int* desca,
        const double* b, const int* ib, const int* jb, const int* descb, const double* beta,
        double* c, const int* ic, const int* jc, const int* descc);

void pcgemm(const char* trans_a, const char* transb, const int* m, const int* n, const int* k,
        const float * alpha, const float * a, const int* ia,
        const int* ja, const int* desca, const float * b, const int* ib,
        const int* jb, const int* descb, const float * beta,
        float * c, const int* ic, const int* jc, const int* descc);

void pzgemm(const char* trans_a, const char* transb, const int* m, const int* n, const int* k,
        const double * alpha, const double * a, const int* ia,
        const int* ja, const int* desca, const double * b, const int* ib,
        const int* jb, const int* descb, const double * beta,
        double * c, const int* ic, const int* jc, const int* descc);

// *********************************************************************************
// Same as previously, but with added underscore at the end.
// This is used for fortran interfaces, in case fortran expects these symbols
// *********************************************************************************

void psgemm_(const char* trans_a, const char* transb, const int* m, const int* n, const int* k,
        const float* alpha, const float* a, const int* ia, const int* ja, const int* desca,
        const float* b, const int* ib, const int* jb, const int* descb, const float* beta,
        float* c, const int* ic, const int* jc, const int* descc);

void pdgemm_(const char* trans_a, const char* transb, const int* m, const int* n, const int* k,
        const double* alpha, const double* a, const int* ia, const int* ja, const int* desca,
        const double* b, const int* ib, const int* jb, const int* descb, const double* beta,
        double* c, const int* ic, const int* jc, const int* descc);

void pcgemm_(const char* trans_a, const char* transb, const int* m, const int* n, const int* k,
        const float * alpha, const float * a, const int* ia,
        const int* ja, const int* desca, const float * b, const int* ib,
        const int* jb, const int* descb, const float * beta,
        float * c, const int* ic, const int* jc, const int* descc);

void pzgemm_(const char* trans_a, const char* transb, const int* m, const int* n, const int* k,
        const double * alpha, const double * a, const int* ia,
        const int* ja, const int* desca, const double * b, const int* ib,
        const int* jb, const int* descb, const double * beta,
        double * c, const int* ic, const int* jc, const int* descc);

// *********************************************************************************
// Same as previously, but with double underscore at the end.
// This is used for fortran interfaces, in case fortran expects these symbols
// *********************************************************************************

void PSGEMM_(const char* trans_a, const char* transb, const int* m, const int* n, const int* k,
        const float* alpha, const float* a, const int* ia, const int* ja, const int* desca,
        const float* b, const int* ib, const int* jb, const int* descb, const float* beta,
        float* c, const int* ic, const int* jc, const int* descc);

void PDGEMM_(const char* trans_a, const char* transb, const int* m, const int* n, const int* k,
        const double* alpha, const double* a, const int* ia, const int* ja, const int* desca,
        const double* b, const int* ib, const int* jb, const int* descb, const double* beta,
        double* c, const int* ic, const int* jc, const int* descc);

void PCGEMM_(const char* trans_a, const char* transb, const int* m, const int* n, const int* k,
        const float * alpha, const float * a, const int* ia,
        const int* ja, const int* desca, const float * b, const int* ib,
        const int* jb, const int* descb, const float * beta,
        float * c, const int* ic, const int* jc, const int* descc);

void PZGEMM_(const char* trans_a, const char* transb, const int* m, const int* n, const int* k,
        const double * alpha, const double * a, const int* ia,
        const int* ja, const int* desca, const double * b, const int* ib,
        const int* jb, const int* descb, const double * beta,
        double * c, const int* ic, const int* jc, const int* descc);

// *********************************************************************************
// Same as previously, but CAPITALIZED.
// This is used for fortran interfaces, in case fortran expects these symbols
// *********************************************************************************

void PSGEMM(const char* trans_a, const char* transb, const int* m, const int* n, const int* k,
        const float* alpha, const float* a, const int* ia, const int* ja, const int* desca,
        const float* b, const int* ib, const int* jb, const int* descb, const float* beta,
        float* c, const int* ic, const int* jc, const int* descc);

void PDGEMM(const char* trans_a, const char* transb, const int* m, const int* n, const int* k,
        const double* alpha, const double* a, const int* ia, const int* ja, const int* desca,
        const double* b, const int* ib, const int* jb, const int* descb, const double* beta,
        double* c, const int* ic, const int* jc, const int* descc);

void PCGEMM(const char* trans_a, const char* transb, const int* m, const int* n, const int* k,
        const float * alpha, const float * a, const int* ia,
        const int* ja, const int* desca, const float * b, const int* ib,
        const int* jb, const int* descb, const float * beta,
        float * c, const int* ic, const int* jc, const int* descc);

void PZGEMM(const char* trans_a, const char* transb, const int* m, const int* n, const int* k,
        const double * alpha, const double * a, const int* ia,
        const int* ja, const int* desca, const double * b, const int* ib,
        const int* jb, const int* descb, const double * beta,
        double * c, const int* ic, const int* jc, const int* descc);

#ifdef __cplusplus
}
#endif


================================================
FILE: src/cosma/pxgemm_params.hpp
================================================
// a container class, containing all the parameters of pxgemm
#pragma once
#include <algorithm>
#include <cmath>
#include <vector>
#include <stdexcept>
#include <cosma/scalapack.hpp>

namespace cosma {
template <typename T>
struct pxgemm_params {
    // ****************************************
    // *       INPUT PARAMETERS BEGIN         *
    // ****************************************
    // *  global dimensions  *
    // ***********************
    // matrix A
    int ma; // rows
    int na; // cols

    // matrix B
    int mb; // rows
    int nb; // cols

    // matrix C
    int mc; // rows
    int nc; // cols

    // ***********************
    // *     block sizes     *
    // ***********************
    // matrix A
    int bma; // rows
    int bna; // cols

    // matrix B
    int bmb; // rows
    int bnb; // cols

    // matrix C
    int bmc; // rows
    int bnc; // cols

    // ***********************
    // *   submatrices ij    *
    // ***********************
    // matrix A
    int ia = 1; // rows
    int ja = 1; // cols

    // matrix B
    int ib = 1; // rows
    int jb = 1; // cols

    // matrix C
    int ic = 1; // rows
    int jc = 1; // cols

    // ***********************
    // *    problem size     *
    // ***********************
    int m;
    int n;
    int k;

    // ***********************
    // *   transpose flags   *
    // ***********************
    char trans_a = 'N';
    char trans_b = 'N';

    // ***********************
    // *    scaling flags    *
    // ***********************
    T alpha = T{1};
    T beta = T{0};

    // ***********************
    // *    leading dims     *
    // ***********************
    int lld_a;
    int lld_b;
    int lld_c;

    // ***********************
    // *      proc grid      *
    // ***********************
    int p_rows; // rows
    int p_cols; // cols
    int P;
    char order = 'R';

    // ***********************
    // *      proc srcs      *
    // ***********************
    // matrix A
    int src_ma = 0; // rows
    int src_na = 0; // cols

    // matrix B
    int src_mb = 0; // rows
    int src_nb = 0; // cols

    // matrix C
    int src_mc = 0; // rows
    int src_nc = 0; // cols

    // ****************************************
    // *         INPUT PARAMETERS END         *
    // ****************************************
    pxgemm_params() = default;

    void initialize(int mm, int nn, int kk,
                    int block_a1, int block_a2,
                    int block_b1, int block_b2,
                    int block_c1, int block_c2,
                    int prows, int pcols,
                    char transa, char transb,
                    T a, T b) {
        m = mm;
        n = nn;
        k = kk;

        // global problem size
        // m, n, k are just sizes that we want to multiply
        // starting from (ia-1, ja-1), (ib-1, jb-1) and (ic-1, jc-1)
        // this makes the global problem size m+ia-1, n+jb-1, k+ja-1
        // use transa instead of trans_a since trans_a is set afterwards
        ma = transpose_if(transa, k, m);
        na = transpose_if(transa, m, k);

        mb = transpose_if(transb, n, k);
        nb = transpose_if(transb, k, n);
        mc = m;
        nc = n;

        // block sizes
        bma = block_a1;
        bna = block_a2;

        bmb = block_b1;
        bnb = block_b2;

        bmc = block_c1;
        bnc = block_c2;

        // submatrices ij
        ia = 1; ja = 1;
        ib = 1; jb = 1;
        ic = 1; jc = 1;

        // transpose flags
        trans_a = std::toupper(transa);
        trans_b = std::toupper(transb);

        // scaling parameters
        alpha = a;
        beta = b;

        // proc grid
        order = 'R';
        p_rows = prows;
        p_cols = pcols;
        P = p_rows * p_cols;

        // leading dims
        lld_a = scalapack::max_leading_dimension(ma, bma, p_rows);
        lld_b = scalapack::max_leading_dimension(mb, bmb, p_rows);
        lld_c = scalapack::max_leading_dimension(mc, bmc, p_rows);

        // proc srcs
        src_ma = 0; src_na = 0;
        src_mb = 0; src_nb = 0;
        src_mc = 0; src_nc = 0;
    }

    pxgemm_params(int m, int n, int k,
                  int bm, int bn, int bk,
                  int prows, int pcols,
                  char transa, char transb,
                  T a, T b) {
        // block sizes
        // blocks BEFORE transposing (if transposed)
        bma = transpose_if(transa, bk, bm);
        bna = transpose_if(transa, bm, bk);

        bmb = transpose_if(transb, bn, bk);
        bnb = transpose_if(transb, bk, bn);

        bmc = bm;
        bnc = bn;

        initialize(m, n, k,
                   bma, bna,
                   bmb, bnb,
                   bmc, bnc,
                   prows, pcols,
                   transa, transb,
                   a, b);
        std::string info;
        if (!valid(info)) {
            std::runtime_error("WRONG PXGEMM PARAMETER: " + info);
        }
    }


    pxgemm_params(int m, int n, int k,
                  int block_a1, int block_a2,
                  int block_b1, int block_b2,
                  int block_c1, int block_c2,
                  int prows, int pcols,
                  char transa, char transb,
                  T a, T b) {
        initialize(m, n, k,
                   block_a1, block_a2,
                   block_b1, block_b2,
                   block_c1, block_c2,
                   prows, pcols,
                   transa, transb,
                   a, b);
        std::string info;
        if (!valid(info)) {
            std::runtime_error("WRONG PXGEMM PARAMETER: " + info);
        }
    }

    pxgemm_params(
        // global sizes
        int ma, int na, // matrix A
        int mb, int nb, // matrix B
        int mc, int nc, // matrix C

        // block sizes
        int bma, int bna, // matrix A
        int bmb, int bnb, // matrix B
        int bmc, int bnc, // matrix C

        // submatrices ij
        int ia, int ja, // matrix A
        int ib, int jb, // matrix B
        int ic, int jc, // matrix C

        // problem size
        int m, int n, int k,

        // transpose flags
        char trans_a, char trans_b,

        // scaling flags
        T alpha, T beta,

        // leading dimensions
        int lld_a, int lld_b, int lld_c,

        // processor grid
        int p_rows, int p_cols,
        char order,

        // processor srcs
        int src_ma, int src_na, // matrix A
        int src_mb, int src_nb, // matrix B
        int src_mc, int src_nc // matrix C
    ) :
        ma(ma), na(na),
        mb(mb), nb(nb),
        mc(mc), nc(nc),

        bma(bma), bna(bna),
        bmb(bmb), bnb(bnb),
        bmc(bmc), bnc(bnc),

        ia(ia), ja(ja),
        ib(ib), jb(jb),
        ic(ic), jc(jc),

        m(m), n(n), k(k),

        trans_a(std::toupper(trans_a)),
        trans_b(std::toupper(trans_b)),

        alpha(alpha), beta(beta),

        lld_a(lld_a), lld_b(lld_b), lld_c(lld_c),

        order(std::toupper(order)),
        p_rows(p_rows), p_cols(p_cols),
        P(p_rows * p_cols),

        src_ma(src_ma), src_na(src_na),
        src_mb(src_mb), src_nb(src_nb),
        src_mc(src_mc), src_nc(src_nc)
    {
        std::string info;
        if (!valid(info)) {
            std::runtime_error("WRONG PXGEMM PARAMETER: " + info);
        }
    }

    int transpose_if(char transpose_flag, int row, int col) {
        bool transposed = transpose_flag != 'N';
        int result = transposed ? row : col;
        return result;
    }

    // if parameters are correct:
    //     returns true is returned and info = "";
    // else:
    //     returns false and info = name of the incorrectly set variable;
    bool valid(std::string& info) {
        info = "";
        // *************************************************
        // check if transpose flags have proper values
        // *************************************************
        std::vector<char> t_flags = {'N', 'T', 'C'};
        if (std::find(t_flags.begin(), t_flags.end(), trans_a) == t_flags.end()) {
            info = "trans_a = " + std::to_string(trans_a);
            return false;
        }
        if (std::find(t_flags.begin(), t_flags.end(), trans_b) == t_flags.end()) {
            info = "trans_b = " + std::to_string(trans_b);
            return false;
        }
        if (order != 'R' && order != 'C') {
            info = "oder = " + std::to_string(order);
            return false;
        }

        // *************************************************
        // check if the following values are all positive
        // *************************************************
        std::vector<int> positive = {
             ma, na, mb, nb, mc, nc,
             bma, bna, bmb, bnb, bmc, bnc,
             m, n, k,
             lld_a, lld_b, lld_c,
             p_rows, p_cols, P,
        };
        std::vector<std::string> positive_labels = {
             "ma", "na", "mb", "nb", "mc", "nc",
             "bma", "bna", "bmb", "bnb", "bmc", "bnc",
             "m", "n", "k",
             "lld_a", "lld_b", "lld_c",
             "p_rows", "p_cols", "P"
        };
        for (std::size_t i = 0; i < positive.size(); ++i) {
            if (positive[i] < 0) {
                info = positive_labels[i] + " = " + std::to_string(positive[i]);
                return false;
            }
        }

        // *************************************************
        // check if submatrix start index  
        // is inside the global matrix
        // *************************************************
        // matrix A
        if (ia < 1 || ia > ma) {
            info = "ia = " + std::to_string(ia);
            return false;
        }
        if (ja < 1 || ja > na) {
            info = "ja = " + std::to_string(ja);
            return false;
        }

        // matrix B
        if (ib < 1 || ib > mb) {
            info = "ib = " + std::to_string(ib);
            return false;
        }
        if (jb < 1 || jb > nb) {
            info = "jb = " + std::to_string(jb);
            return false;
        }

        // matrix C
        if (ic < 1 || ic > mc) {
            info = "ic = " + std::to_string(ic);
            return false;
        }
        if (jc < 1 || jc > nc) {
            info = "jc = " + std::to_string(jc);
            return false;
        }

        // *************************************************
        // check if submatrix end index
        // is inside the global matrix
        // *************************************************
        // matrix A
        int ma_sub = transpose_if(trans_a, k, m);
        // guaranteed to be >= ia 
        // (since we previously checked ma_sub >= 1 and ia >= 1)
        int ma_sub_end = ia - 1 + ma_sub;
        if (ma_sub_end >= ma) {
            info = "ia - 1 + (m or k) = " + std::to_string(ma_sub_end);
            return false;
        }
        int na_sub = transpose_if(trans_a, m, k);
        // guaranteed to be >= ja 
        // (since we previously checked na_sub >= 1 and ja >= 1)
        int na_sub_end = ja - 1 + na_sub;
        if (na_sub_end >= na) {
            info = "ja - 1 + (k or m) = " + std::to_string(na_sub_end);
            return false;
        }

        // matrix B
        int mb_sub = transpose_if(trans_b, n, k);
        // guaranteed to be >= ib 
        // (since we previously checked mb_sub >= 1 and ib >= 1)
        int mb_sub_end = ib - 1 + mb_sub;
        if (mb_sub_end >= mb) {
            info = "ib - 1 + (k or n) = " + std::to_string(mb_sub_end);
            return false;
        }
        int nb_sub = transpose_if(trans_b, k, n);
        // guaranteed to be >= jb 
        // (since we previously checked nb_sub >= 1 and jb >= 1)
        int nb_sub_end = jb - 1 + nb_sub;
        if (nb_sub_end >= nb) {
            info = "jb - 1 + (n or k) = " + std::to_string(nb_sub_end);
            return false;
        }

        // matrix C
        int mc_sub = m;
        // guaranteed to be >= ic 
        // (since we previously checked mc_sub >= 1 and ic >= 1)
        int mc_sub_end = ic - 1 + mc_sub;
        if (mc_sub_end >= mc) {
            info = "ic - 1 + m = " + std::to_string(mc_sub_end);
            return false;
        }
        int nc_sub = n;
        // guaranteed to be >= jc 
        // (since we previously checked nc_sub >= 1 and jc >= 1)
        int nc_sub_end = jc - 1 + nc_sub;
        if (nc_sub_end >= nc) {
            info = "jc - 1 + n = " + std::to_string(nc_sub_end);
            return false;
        }

        // *************************************************
        // check if row/col src elements are within the 
        // global dimensions of matrices
        // *************************************************
        // matrix A
        if (src_ma < 0 || src_ma >= ma) {
            info = "src_ma = " + std::to_string(src_ma);
            return false;
        }
        if (src_na < 0 || src_na >= na) {
            info = "src_na = " + std::to_string(src_na);
            return false;
        }

        // matrix B
        if (src_mb < 0 || src_mb >= mb) {
            info = "src_mb = " + std::to_string(src_mb);
            return false;
        }
        if (src_nb < 0 || src_nb >= nb) {
            info = "src_nb = " + std::to_string(src_nb);
            return false;
        }

        // matrix C
        if (src_mc < 0 || src_mc >= mc) {
            info = "src_mc = " + std::to_string(src_mc);
            return false;
        }
        if (src_nc < 0 || src_nc >= nc) {
            info = "src_nc = " + std::to_string(src_nc);
            return false;
        }

        // *************************************************
        // check leading dimensions
        // *************************************************
        int min_lld_a = scalapack::min_leading_dimension(ma, bma, p_rows);
        int min_lld_b = scalapack::min_leading_dimension(mb, bmb, p_rows);
        int min_lld_c = scalapack::min_leading_dimension(mc, bmc, p_rows);

        if (lld_a < min_lld_a) {
            info = "lld_a = " + std::to_string(min_lld_a);
            return false;
        }
        if (lld_b < min_lld_b) {
            info = "lld_b = " + std::to_string(min_lld_b);
            return false;
        }
        if (lld_c < min_lld_c) {
            info = "lld_c = " + std::to_string(min_lld_c);
            return false;
        }

        return true;
    }

    friend std::ostream &operator<<(std::ostream &os,
                                    const pxgemm_params &obj) {
        os << "=============================" << std::endl;
        os << "      GLOBAL MAT. SIZES" << std::endl;
        os << "=============================" << std::endl;
        os << "A = " << obj.ma << " x " << obj.na << std::endl;
        os << "B = " << obj.mb << " x " << obj.nb << std::endl;
        os << "C = " << obj.mc << " x " << obj.nc << std::endl;
        os << "=============================" << std::endl;
        os << "        SUBMATRICES" << std::endl;
        os << "=============================" << std::endl;
        os << "(ia, ja) = (" << obj.ia << ", " << obj.ja << ")" << std::endl;
        os << "(ib, jb) = (" << obj.ib << ", " << obj.jb << ")" << std::endl;
        os << "(ic, jc) = (" << obj.ic << ", " << obj.jc << ")" << std::endl;
        os << "=============================" << std::endl;
        os << "      SUBMATRIX SIZES" << std::endl;
        os << "=============================" << std::endl;
        os << "m = " << obj.m << std::endl;
        os << "n = " << obj.n << std::endl;
        os << "k = " << obj.k << std::endl;
        os << "=============================" << std::endl;
        os << "      ADDITIONAL OPTIONS" << std::endl;
        os << "=============================" << std::endl;
        os << "alpha = " << obj.alpha << std::endl;
        os << "beta = " << obj.beta << std::endl;
        os << "trans_a = " << obj.trans_a << std::endl;
        os << "trans_b = " << obj.trans_b << std::endl;
        os << "=============================" << std::endl;
        os << "         PROC GRID" << std::endl;
        os << "=============================" << std::endl;
        os << "grid = " << obj.p_rows << " x " << obj.p_cols << std::endl;
        os << "grid order = " << obj.order << std::endl;
        os << "=============================" << std::endl;
        os << "         PROC SRCS" << std::endl;
        os << "=============================" << std::endl;
        os << "P_SRC(A) = (" << obj.src_ma << ", " << obj.src_na << ")" << std::endl;
        os << "P_SRC(B) = (" << obj.src_mb << ", " << obj.src_nb << ")" << std::endl;
        os << "P_SRC(C) = (" << obj.src_mc << ", " << obj.src_nc << ")" << std::endl;
        os << "=============================" << std::endl;
        os << "          BLOCK SIZES" << std::endl;
        os << "=============================" << std::endl;
        os << "Blocks(A) = (" << obj.bma << ", " << obj.bna << ")" << std::endl;
        os << "Blocks(B) = (" << obj.bmb << ", " << obj.bnb << ")" << std::endl;
        os << "Blocks(C) = (" << obj.bmc << ", " << obj.bnc << ")" << std::endl;
        os << "=============================" << std::endl;
        os << "          LEADING DIMS" << std::endl;
        os << "=============================" << std::endl;
        os << "lld_a = " << obj.lld_a << std::endl;
        os << "lld_b = " << obj.lld_b << std::endl;
        os << "lld_c = " << obj.lld_c << std::endl;
        os << "=============================" << std::endl;
        return os;
    }
};
}


================================================
FILE: src/cosma/random_generator.hpp
================================================
#pragma once

#include <complex>
#include <random>

namespace cosma {
/*
 * generators a random int, float, double, 
 * complex<int>, complex<float>, complex<double>
 */
template <typename Scalar>
struct random_generator {
  static inline Scalar sample();
};

template <>
inline int random_generator<int>::sample() {
    static std::random_device dev;                        // seed
    static std::mt19937 rng(dev());                       // generator
    static std::uniform_int_distribution<int> dist(0, 10); // distribution

    return dist(rng);
}

template <>
inline double random_generator<double>::sample() {
    static std::random_device dev;                        // seed
    static std::mt19937 rng(dev());                       // generator
    static std::uniform_real_distribution<double> dist(0.0, 1.0); // distribution

    return dist(rng);
}

template <>
inline float random_generator<float>::sample() {
    static std::random_device dev;                        // seed
    static std::mt19937 rng(dev());                       // generator
    static std::uniform_real_distribution<float> dist(0.0f, 1.0f); // distribution

    return dist(rng);
}

template <>
inline std::complex<int> random_generator<std::complex<int>>::sample() {
    static std::random_device dev;                        // seed
    static std::mt19937 rng(dev());                       // generator
    static std::uniform_int_distribution<int> dist(0, 10); // distribution
    return {dist(rng), dist(rng)};
}

template <>
inline std::complex<float> random_generator<std::complex<float>>::sample() {
    static std::random_device dev;                        // seed
    static std::mt19937 rng(dev());                       // generator
    static std::uniform_real_distribution<float> dist(0.0f, 1.0f); // distribution
    return {dist(rng), dist(rng)};
}

template <>
inline std::complex<double> random_generator<std::complex<double>>::sample() {
    static std::random_device dev;                        // seed
    static std::mt19937 rng(dev());                       // generator
    static std::uniform_real_distribution<double> dist(0.0, 1.0); // distribution
    return {dist(rng), dist(rng)};
}
} // end namespace cosma


================================================
FILE: src/cosma/scalapack.cpp
================================================
#include <cosma/scalapack.hpp>

costa::scalapack::ordering cosma::scalapack::rank_ordering(int ctxt, int P) {
    // check whether rank grid is row-major or col-major
    auto ordering = costa::scalapack::ordering::column_major;
    if (P > 1) {
        int prow, pcol;
        // check the coordinates of rank 1 to see
        // if the rank grid is row-major or col-major
        blacs::Cblacs_pcoord(ctxt, 1, &prow, &pcol);
        if (prow == 0 && pcol == 1) {
            ordering = costa::scalapack::ordering::row_major;
        }
    }
    return ordering;
}

int cosma::scalapack::get_grid_context(const int* desca, const int* descb, const int* descc) {
    int ctxt = desca[1];
    // all matrices should belong to the same context
    assert(desca[1] == descb[1]);
    assert(descb[1] == descc[1]);
    return ctxt;
}

int cosma::scalapack::get_grid_context(const int* desc) {
    return desc[1];
}

int cosma::scalapack::leading_dimension(const int* desc) {
    return desc[8];
}

// queries the grid blacs context to get the communication blacs context
int cosma::scalapack::get_comm_context(const int grid_context) {
    int comm_context;
    blacs::Cblacs_get(grid_context, 10, &comm_context);
    return comm_context;
}

// gets MPI_Comm from the grid blacs context
MPI_Comm cosma::scalapack::get_communicator(const int grid_context) {
    int comm_context = get_comm_context(grid_context);
    MPI_Comm comm = blacs::Cblacs2sys_handle(comm_context);
    return comm;
}

// computes the number of rows or columns that the specified rank owns
int cosma::scalapack::numroc(int n, int nb, int proc_coord, int proc_src, int n_procs) {
    // Arguments:
    /*
      - n: global matrix dimension (rows or columns)
      - nb: corresponding dimension of a block
      - proc_coord: coordinate of the process for which we are querying
      - proc_src: process src
      - n_procs: total number of processes along this dimension
     */
    // number of whole blocks along this dimension
    int n_blocks = n / nb;

    // the offset of given process to the source process
    // make sure it stays positive
    int proc_offset = (n_procs + proc_coord - proc_src) % n_procs;

    // number of blocks per process (at least)
    // Can also be zero.
    int n_blocks_per_process = n_blocks/n_procs;
    // Number of rows or columns that each process has (at least).
    // Can also be zero.
    int n_rows_or_cols_per_process = n_blocks_per_process * nb;

    // each rank owns at least this base
    int n_rows_or_columns_total = n_rows_or_cols_per_process;

    // if there is a remainder, then the current 
    // process might own some additional blocks
    int remainder = n_blocks % n_procs;

    // possible additional "whole" blocks that
    // the current rank owns
    n_rows_or_columns_total += proc_offset < remainder ? nb : 0;
    // possible additional "partial" blocks that 
    // the current ranks owns
    n_rows_or_columns_total += proc_offset == remainder ? n % nb : 0;

    return n_rows_or_columns_total;
}

// minimum lld: used mostly for correctness checking of pxgemm parameters
int cosma::scalapack::min_leading_dimension(int n, int nb, int rank_grid_dim) {
    // Arguments:
    /*
      - n: global matrix dimension (rows or columns)
      - nb: corresponding dimension of a block
      - rank_grid_dim: total number of processes along this dimension
     */
    // number of blocks along this dimension
    int n_blocks = n / nb;

    // number of blocks per process (at least)
    // Can also be zero.
    int n_blocks_per_process = n_blocks/rank_grid_dim;
    // Number of rows or columns that each process has (at least).
    // Can also be zero.
    // each rank owns at least this many rows
    int min_n_rows_or_cols_per_process = n_blocks_per_process * nb;

    return min_n_rows_or_cols_per_process;
}

// maximum lld: used mostly for correctness checking of pxgemm parameters
int cosma::scalapack::max_leading_dimension(int n, int nb, int rank_grid_dim) {
    // Arguments:
    /*
      - n: global matrix dimension (rows or columns)
      - nb: corresponding dimension of a block
      - rank_grid_dim: total number of processes along this dimension
     */
    int lld = min_leading_dimension(n, nb, rank_grid_dim);
    int n_blocks = n / nb;
    int remainder = n_blocks % rank_grid_dim;
    lld += (remainder == 0) ? (n % nb) : nb;
    return lld;
}

int cosma::scalapack::local_buffer_size(const int* desc) {
    int lld = leading_dimension(desc);

    int n_cols = desc[3]; // global matrix size (columns)
    int nb_cols = desc[5]; // block size (columns)
    int src_proc = desc[7]; // processor src (columns)

    int ctxt = desc[1];

    int nprow, npcol, myrow, mycol;
    blacs::Cblacs_gridinfo(ctxt, &nprow, &npcol, &myrow, &mycol);

    int P = nprow * npcol;

    int n_local_cols = numroc(n_cols, nb_cols, mycol, src_proc, npcol);

    return lld * n_local_cols;
}


================================================
FILE: src/cosma/scalapack.hpp
================================================
#pragma once

#include <cosma/blacs.hpp>

#include <costa/grid2grid/scalapack_layout.hpp>

#include <cassert>

namespace cosma {
namespace scalapack {
struct block_size {
    int rows = 0;
    int cols = 0;

    block_size() = default;
    block_size(int rows, int cols): rows(rows), cols(cols) {}
    block_size(const int* desc) {
        rows = desc[4];
        cols = desc[5];
    }


};

struct global_matrix_size {
    int rows = 0;
    int cols = 0;

    global_matrix_size() = default;
    global_matrix_size(int rows, int cols): rows(rows), cols(cols) {}
    global_matrix_size(const int* desc) {
        rows = desc[2];
        cols = desc[3];
    }
};

struct rank_src {
    int row_src = 0;
    int col_src = 0;

    rank_src() = default;
    rank_src(int rsrc, int csrc): row_src(rsrc), col_src(csrc) {}
    rank_src(const int* desc) {
        row_src = desc[6];
        col_src = desc[7];
    }
};

costa::scalapack::ordering rank_ordering(int ctxt, int P);

// gets the grid context from descriptors of A, B and C and compares
// if all three matrices belong to the same context
int get_grid_context(const int* desca, const int* descb, const int* descc);
// same as previous, but just for a single matrix
int get_grid_context(const int* desc);
// gets the communication blacs context from the grid blacs context
int get_comm_context(const int grid_context);
// gets the MPI communicator from the grid blacs context
MPI_Comm get_communicator(const int grid_context);

// minimum leading dimension (independent of current rank)
// used mostly for checking the correctness of parameters
int min_leading_dimension(int n, int nb, int rank_grid_dim);
// maximum leading dimension (independent of current rank)
// used mostly for checking the correctness of parameters
int max_leading_dimension(int n, int nb, int rank_grid_dim);

int leading_dimension(const int* desc);

int numroc(int n, int nb, int iproc, int isrcproc, int nprocs);

int local_buffer_size(const int* desc);
}}


================================================
FILE: src/cosma/statistics.hpp
================================================
#include <cosma/matrix.hpp>
#include <cosma/strategy.hpp>

namespace cosma {
/* Simulates the algorithm (without actually computing the matrix
   multiplication) and outputs the following information:
       * total volume of the communication
       * maximum volume of computation done in a single branch
       * maximum buffer size that the algorithm requires
       * size of matrix (m, n, k) in the base case with the maximum
   computational volume
 */

CosmaMatrix<double> *matrixA;
CosmaMatrix<double> *matrixB;
CosmaMatrix<double> *matrixC;

long long total_communication = 0;
long long max_buffer_size = 0;
long long max_total_computation = 0;
int local_m = 0;
int local_n = 0;
int local_k = 0;

void multiply(const Strategy &strategy);

void multiply(Interval &m,
              Interval &n,
              Interval &k,
              Interval &P,
              int step,
              const Strategy &strategy,
              double beta,
              int rank);

void local_multiply(int m, int n, int k, double beta);

void sequential(Interval &m,
                Interval &n,
                Interval &k,
                Interval &P,
                int step,
                const Strategy &strategy,
                double beta,
                int rank);

void parallel(Interval &m,
              Interval &n,
              Interval &k,
              Interval &P,
              int step,
              const Strategy &strategy,
              double beta,
              int rank);

// Assumption: we assume that at each step only 1 dimension is split
void multiply(const Strategy &strategy, int n_rep = 1) {
    Interval mi = Interval(0, strategy.m - 1);
    Interval ni = Interval(0, strategy.n - 1);
    Interval ki = Interval(0, strategy.k - 1);
    Interval Pi = Interval(0, strategy.P - 1);

    bool dry_run = true;
    // Declare A,B and COSMA matrices objects
    matrixA = new CosmaMatrix<double>('A', strategy, 0, dry_run);
    matrixB = new CosmaMatrix<double>('B', strategy, 0, dry_run);
    matrixC = new CosmaMatrix<double>('C', strategy, 0, dry_run);

    // simulate the algorithm for each rank
    for (int rank = 0; rank < Pi.length(); ++rank) {
        multiply(mi, ni, ki, Pi, 0, strategy, 0.0, rank);
    }

    free(matrixA);
    free(matrixB);
    free(matrixC);

    std::cout << "Total communication volume per rank in [GB]: "
              << sizeof(double) * total_communication / (strategy.P * 1e9) << std::endl;
    std::cout << "Total computation units: " << max_total_computation
              << std::endl;
    std::cout << "Max buffer size: " << max_buffer_size << std::endl;
    std::cout << "Local m = " << local_m << std::endl;
    std::cout << "Local n = " << local_n << std::endl;
    std::cout << "Local k = " << local_k << std::endl;
}

// dispatch to local call, parallel, or sequential as appropriate
void multiply(Interval &m,
              Interval &n,
              Interval &k,
              Interval &P,
              int step,
              const Strategy &strategy,
              double beta,
              int rank) {
    // current submatrices that are being computed
    Interval2D a_range(m, k);
    Interval2D b_range(k, n);
    Interval2D c_range(m, n);

    // For each of P processors remember which sequential bucket we are
    // currently on
    std::vector<int> bucketA = matrixA->seq_buckets(P);
    std::vector<int> bucketB = matrixB->seq_buckets(P);
    std::vector<int> bucketC = matrixC->seq_buckets(P);

    // Skip all buckets that are "before" the current submatrices.
    // the relation submatrix1 <before> submatrix2 is defined in Interval2D.
    // Intuitively, this will skip all the buckets that are "above" or "on the
    // left" of the current submatrices. We say "before" because whenever we
    // split sequentially, we always first start with the "above" submatrix (if
    // the splitting is horizontal) or with the left one (if the splitting is
    // vertical). which explains the name of the relation "before".
    matrixA->update_buckets(P, a_range);
    matrixB->update_buckets(P, b_range);
    matrixC->update_buckets(P, c_range);

    if (strategy.final_step(step) || strategy.empty())
        local_multiply(m.length(), n.length(), k.length(), beta);
    else {
        if (strategy.parallel_step(step))
            parallel(m, n, k, P, step, strategy, beta, rank);
        else
            sequential(m, n, k, P, step, strategy, beta, rank);
    }

    // Revert the buckets pointers to their previous values.
    matrixA->set_seq_buckets(P, bucketA);
    matrixB->set_seq_buckets(P, bucketB);
    matrixC->set_seq_buckets(P, bucketC);
}

void local_multiply(int m, int n, int k, double beta) {
    long long comp = m * n * k;
    if (comp > max_total_computation) {
        max_total_computation = comp;
        local_m = m;
        local_n = n;
        local_k = k;
    }
}

/*
  In each sequential step, one of the dimensions is split, and each of the
  subproblems is solved sequentially by all P processors.
*/
void sequential(Interval &m,
                Interval &n,
                Interval &k,
                Interval &P,
                int step,
                const Strategy &strategy,
                double beta,
                int rank) {
    // split the dimension but not the processors,
    // all P processors are taking part in each substep
    if (strategy.split_m(step)) {
        for (int M = 0; M < strategy.divisor(step); ++M) {
            Interval newm = m.subinterval(strategy.divisor(step), M);
            multiply(newm, n, k, P, step + 1, strategy, beta, rank);
        }
        return;
    }

    if (strategy.split_n(step)) {
        for (int N = 0; N < strategy.divisor(step); ++N) {
            Interval newn = n.subinterval(strategy.divisor(step), N);
            multiply(m, newn, k, P, step + 1, strategy, beta, rank);
        }
        return;
    }

    // if divided by k, then the result of each subproblem is just a partial
    // result for C which should all be summed up. We solve this by letting beta
    // parameter be 1 in substeps that follow so that dgemm automatically adds
    // up the subsequent results to the previous partial results of C.
    if (strategy.split_k(step)) {
        for (int K = 0; K < strategy.divisor(step); ++K) {
            Interval newk = k.subinterval(strategy.divisor(step), K);
            multiply(m,
                     n,
                     newk,
                     P,
                     step + 1,
                     strategy,
                     (K == 0) && (beta == 0) ? 0 : 1,
                     rank);
        }
        return;
    }
}

template <typename T>
T which_is_expanded(T A, T B, T C, const Strategy &strategy, size_t step) {
    // divn > 1 => divm==divk==1 => matrix A has not been splitted
    // therefore it is expanded (in the communication of a parallel step)
    if (strategy.split_n(step))
        return A;

    // divm > 1 => divk==divn==1 => matrix B has not been splitted
    // therefore it is expanded (in the communication of a parallel step)
    if (strategy.split_m(step))
        return B;

    // divk > 1 => divm==divn==1 => matrix C has not been splitted
    // therefore it is expanded (in the communication of a parallel step)
    if (strategy.split_k(step))
        return C;

    // this should never happen
    return C;
}

void parallel(Interval &m,
              Interval &n,
              Interval &k,
              Interval &P,
              int step,
              const Strategy &strategy,
              double beta,
              int rank) {
    int div = strategy.divisor(step);
    int divm = strategy.divisor_m(step);
    int divn = strategy.divisor_n(step);
    int divk = strategy.divisor_k(step);

    // processor subinterval which the current rank belongs to
    int partition_idx = P.subinterval_index(div, rank);
    Interval newP = P.subinterval(div, partition_idx);
    // intervals of M, N and K that the current rank is in charge of,
    // together with other ranks from its group.
    // (see the definition of group and offset below)
    Interval newm = m.subinterval(divm, divm > 1 ? partition_idx : 0);
    Interval newn = n.subinterval(divn, divn > 1 ? partition_idx : 0);
    Interval newk = k.subinterval(divk, divk > 1 ? partition_idx : 0);

    int offset = rank - newP.first();

    /*
     * size_before_expansion:
         maps rank i from interval P to the vector [bucket1.size(),
     bucket2.size()...] containing buckets which are inside "range" that rank i
     owns

     * total_before_expansion:
         maps rank i from interval P to the sum of all buckets inside
     size_before_expansion[i]

     * size_after_expansion:
         maps rank i from interval newP to the vector of [bucket1.size(),
     bucket2.size()...] but each bucket here is expanded, i.e. each bucket size
     in this vector is actually the sum of the sizes of this bucket in all the
     ranks from the communication ring of the current rank.

     * total_after_expansion:
         maps rank i from interval P to the sum of all buckets inside
     size_after_expansion[i]
    */
    std::vector<std::vector<int>> size_before_expansion(P.length());
    std::vector<int> total_before_expansion(P.length());
    std::vector<std::vector<int>> size_after_expansion(newP.length());
    std::vector<int> total_after_expansion(newP.length());

    /*
     * this gives us the 2D interval of the matrix that will be expanded:
         if divm > 1 => matrix B expanded => Interval2D(k, n)
         if divn > 1 => matrix A expanded => Interval2D(m, k)
         if divk > 1 => matrix C expanded => Interval2D(m, n)
    */
    Interval row_copy = which_is_expanded(m, k, m, strategy, step);
    Interval col_copy = which_is_expanded(k, n, n, strategy, step);
    Interval2D range(row_copy, col_copy);

    /*
     * this gives us a matrix that is expanded:
         if divm > 1 => matrix B is expanded
         if divn > 1 => matrix A is expanded
         if divk > 1 => matrix C is expanded
    */
    CosmaMatrix<double> *expanded_mat =
        which_is_expanded(matrixA, matrixB, matrixC, strategy, step);
    // gets the buffer sizes before and after expansion.
    // this still does not modify the buffer sizes inside layout
    // it just tells us what they would be.
    expanded_mat->buffers_before_expansion(
        P, range, size_before_expansion, total_before_expansion);

    expanded_mat->buffers_after_expansion(P,
                                          newP,
                                          size_before_expansion,
                                          total_before_expansion,
                                          size_after_expansion,
                                          total_after_expansion);

    // increase the buffer sizes before the substeps
    expanded_mat->set_sizes(newP, size_after_expansion);
    // this is the sum of sizes of all the buckets after expansion
    // that the current rank will own.
    // which is also the size of the matrix after expansion
    long long new_size = total_after_expansion[offset];
    max_buffer_size = std::max(max_buffer_size, new_size);
    int received_volume = new_size - total_before_expansion[rank - P.first()];
    // total_communication += received_volume;
    total_communication += new_size;

    // invoke the substeps call with the new communicator containing ranks from
    // newP observe that we have only one substeps branch per rank here (we are
    // not entering a loop of substeps calls as in sequential steps since the
    // current rank will only enter into one substeps branch since ranks are
    // split).
    multiply(newm, newn, newk, newP, step + 1, strategy, beta, rank);

    // after the memory is freed, the buffer sizes are back to the previous
    // values (the values at the beginning of this parallel step)
    expanded_mat->set_sizes(
        newP, size_before_expansion, newP.first() - P.first());
}
} // namespace cosma


================================================
FILE: src/cosma/strategy.cpp
================================================
#include <cosma/strategy.hpp>
#include <cosma/environment_variables.hpp>

namespace cosma {
int Strategy::get_min_dim_size() {
    static int min_dim_size = get_min_local_dimension();
    return min_dim_size;
}

std::size_t Strategy::n_steps() const {
    return divisors.size();
}

// constructors
Strategy::Strategy() = default;
// copy constructor
Strategy::Strategy(Strategy& other) = default;
Strategy::Strategy(const Strategy& other) = default;

// == operator
bool Strategy::operator==(const Strategy &other) const {
    return 
        this->m == other.m
        &&
        this->n == other.n
        &&
        this->k == other.k
        &&
        this->P == other.P
        &&
        this->memory_limit == other.memory_limit
        &&
        this->divisors == other.divisors
        &&
        this->step_type == other.step_type
        &&
        this->split_dimension == other.split_dimension
        &&
        this->overlap_comm_and_comp == other.overlap_comm_and_comp;
}

bool Strategy::operator!=(const Strategy &other) const {
    return !(*this == other);
}

// if the strategy proposed in divs, dims and types is not complete
// (i.e. does not divide the problem completely), then
// the strategy will try to use the proposed incomplete strategy
// as the prefix and will try to complete it here.
Strategy::Strategy(int mm,
                   int nn,
                   int kk,
                   size_t PP,
                   std::vector<int> &divs,
                   std::string &dims,
                   std::string &types,
                   long long mem_limit,
                   bool top,
                   bool overlap,
                   bool busy_waiting)
    : m(mm)
    , n(nn)
    , k(kk)
    , P(PP)
    , memory_limit(mem_limit)
    , divisors(divs)
    , split_dimension(dims)
    , step_type(types)
    , topology(top)
    , overlap_comm_and_comp(overlap)
    , use_busy_waiting(busy_waiting) {

    // if divisors are non-empty,
    // then take it as a prefix to this strategy
    bool incomplete_strategy = false;
    square_strategy(incomplete_strategy);
    check_if_valid();
    check_if_irregular();
    compute_min_sizes();
}

Strategy::Strategy(int mm,
                   int nn,
                   int kk,
                   size_t PP,
                   long long mem_limit,
                   bool top,
                   bool overlap,
                   bool busy_waiting)
    : m(mm)
    , n(nn)
    , k(kk)
    , P(PP)
    , memory_limit(mem_limit)
    , topology(top)
    , overlap_comm_and_comp(overlap)
    , use_busy_waiting(busy_waiting) {
    divisors.clear();
    step_type = "";
    split_dimension = "";
    bool incomplete_strategy;
    square_strategy(incomplete_strategy);
    // compress_steps();
    // optimize_strategy();
    check_if_valid();
    check_if_irregular();
    compute_min_sizes();
}

std::tuple<long long, long long, long long>
Strategy::initial_memory(long long m, long long n, long long k, int P) {
  return std::make_tuple(
            math_utils::divide_and_round_up(m * k, P),
            math_utils::divide_and_round_up(k * n, P),
            math_utils::divide_and_round_up(m * n, P)
    );
}

bool Strategy::add_step(long long& prev_m, long long& prev_n, long long& prev_k, 
                        int& prev_P, char step, char dim_label, int divisor) {
    long long *dim1, *dim2, *dim3;
    if (dim_label == 'm') {
        dim1 = &prev_m;
        dim2 = &prev_n;
        dim3 = &prev_k;
    } else if (dim_label == 'n') {
        dim1 = &prev_n;
        dim2 = &prev_m;
        dim3 = &prev_k;
    } else {
        dim1 = &prev_k;
        dim2 = &prev_m;
        dim3 = &prev_n;
    }
    // if dimension becomes too small
    // try to correct it (by finding the smaller divisor)
    // or completely ignore this step 
    // if such a divisor cannot be found
    if (*dim1/divisor < get_min_dim_size()) {
        // try to find smaller divisor
        int new_d = *dim1 / get_min_dim_size();
        // check if this divisor is feasible
        if (new_d > 1 && *dim1 / new_d >= get_min_dim_size()) {
            split_dimension += dim_label;
            step_type += step;
            divisors.push_back(new_d);
            *dim1 /= new_d;
            // decrease the number of processes
            // by exchanging the divisor d with new_d
            if (step == 'p') {
                // change the global P as well, because the global
                // number of processors has to be decreased as well
                P = P / divisor * new_d;
                // let it look like we performed this step
                prev_P = prev_P / divisor * new_d;
            }
            return true;
        } else {
            // exclude this divisor
            // ignore this step
            if (step == 'p') {
                // change the global P as well, because the global
                // number of processors has to be decreased as well
                P = P / divisor;
                // let it look like we performed this step
                prev_P = prev_P / divisor;
            }
            return false;
        }
    } else {
        split_dimension += dim_label;
        step_type += step;
        divisors.push_back(divisor);
        *dim1 /= divisor;
        // decrease the number of processes
        // by exchanging the divisor d with new_d
        if (step == 'p') {
            // do not change the global number of processors in this case
            prev_P = prev_P / divisor;
        }
        return true;
    }
}


bool Strategy::divide(std::vector<int> &div_factors,
                      int &dim_i,
                      long long &m,
                      long long &n,
                      long long &k,
                      int &P,
                      const char label) {
    long long dim1, dim2, dim3;
    if (label == 'm') {
        dim1 = m;
        dim2 = n;
        dim3 = k;
    } else if (label == 'n') {
        dim1 = n;
        dim2 = m;
        dim3 = k;
    } else {
        dim1 = k;
        dim2 = m;
        dim3 = n;
    }

    int next_div = 1;
    int accumulated_div = 1;
    bool did_parallel = false;

    if (dim_i < div_factors.size()) {
        next_div = div_factors[dim_i];
    }

    bool largest = dim1 >= std::max(dim2, dim3);
    bool first_run = true;

    // std::cout << "m-split divide and round = " <<
    // math_utils::divide_and_round_up(k * n * next_div, P) << std::endl;
    // std::cout << "m / acc_div = " << m/accumulated_div << std::endl;
    // if m largest => split it
    while (dim_i < div_factors.size() && (largest || first_run)) {
        accumulated_div = next_div;
        did_parallel = true;
        dim_i++;
        // i++;
        if (dim_i >= div_factors.size())
            break;
        next_div *= div_factors[dim_i];

        first_run = false;
        largest = dim1 / accumulated_div >= std::max(dim2, dim3);
    }

    if (did_parallel) {
        // i--;
        return add_step(m, n, k, P, 'p', label, accumulated_div);
    }

    return did_parallel;
}

std::tuple<long long, long long, long long>
maximum_memory(long long m, long long n, long long k, 
               int divm, int divn, int divk, int P) {
    using dim_pair = std::tuple<long long, int, char>;
    std::vector<dim_pair> dims = {
      std::make_tuple(m, divm, 'B'),
      std::make_tuple(n, divn, 'A'),
      std::make_tuple(k, divk, 'C')
    };
    std::sort(dims.begin(), dims.end(),
              [](const dim_pair& a, const dim_pair& b) -> bool {
                  return std::get<0>(a) > std::get<0>(b) ||
                         std::get<0>(a) == std::get<0>(b) && 
                         std::get<1>(a) < std::get<1>(b);
              }
    );

    long long memory_A = 0;
    long long memory_B = 0;
    long long memory_C = 0;

    for (std::size_t i = 0; i < dims.size(); ++i) {
        auto& dim = dims[i];
        auto div = std::get<1>(dim);
        if (div > 1) {
            auto& next_dim = dims[(i+1) % 3];
            auto& next_next_dim = dims[(i+2) % 3];
            auto copied_matrix_size = std::get<0>(next_dim) * std::get<0>(next_next_dim);
            auto memory = math_utils::divide_and_round_up(copied_matrix_size * div, P);

            auto label = std::get<2>(dim);
            if (label == 'A') {
                memory_A = memory;
            } else if (label == 'B') {
                memory_B = memory;
            } else {
                memory_C = memory;
            }
            P /= div;
            std::get<0>(dim) /= div;
        }
    }
    return std::make_tuple(memory_A, memory_B, memory_C);
}

long long memory_with_buffer_optimization(std::vector<long long>& memory_A,
                                          std::vector<long long>& memory_B,
                                          std::vector<long long>& memory_C) {
    long long total_memory = 0;

    // sort (descreasingly) memory for each communication round
    std::sort(memory_A.rbegin(), memory_A.rend());
    std::sort(memory_B.rbegin(), memory_B.rend());
    std::sort(memory_C.rbegin(), memory_C.rend());

    // take the 2 largest elements, because the memory
    // is reused in each communication per matrix
    // memory for matrix A
    if (memory_A.size() > 0) {
        total_memory += memory_A[0];
    }
    if (memory_A.size() > 1) {
        total_memory += memory_A[1];
    }

    // memory for matrix B
    if (memory_B.size() > 0) {
        total_memory += memory_B[0];
    }
    if (memory_B.size() > 1) {
        total_memory += memory_B[1];
    }

    // memory for matrix C
    if (memory_C.size() > 0) {
        total_memory += memory_C[0];
    }
    if (memory_C.size() > 1) {
        total_memory += memory_C[1];
    }

    return total_memory;
}

void Strategy::square_strategy(bool& incomplete_strategy) {
    long long m = this->m;
    long long n = this->n;
    long long k = this->k;
    int P = this->P;

    // total needed memory
    memory_used = 0;

    // initial needed memory
    long long init_memory_A, init_memory_B, init_memory_C;
    std::tie(init_memory_A, init_memory_B, init_memory_C) = initial_memory(m, n, k, P);

    // total memory required for each matrix without buffer optimization
    std::vector<long long> memory_A = {init_memory_A};
    std::vector<long long> memory_B = {init_memory_B};
    std::vector<long long> memory_C = {init_memory_C};

    for (int i = 0; i < divisors.size(); ++i) {
        int div = divisors[i];

        if (step_type[i] == 'p') {
            if (!split_A(i)) {
                memory_A.push_back(math_utils::divide_and_round_up(m * k * div, P));
            } else if (!split_B(i)) {
                memory_B.push_back(math_utils::divide_and_round_up(k * n * div, P));
            } else {
                memory_C.push_back(math_utils::divide_and_round_up(m * n * div, P));
            }
            P /= div;
        }

        m /= divisor_m(i);
        n /= divisor_n(i);
        k /= divisor_k(i);
    }


    // if P == 1 at this point, then it means that the complete strategy was already given
    // at the beginning, so do not try to modify it further.
    incomplete_strategy = P > 1;

    if (!incomplete_strategy) {
        memory_used = memory_with_buffer_optimization(memory_A, memory_B, memory_C);

        if (memory_limit < memory_used) {
            throw_exception(
                std::string("This multiplication requires the memory ") +
                "for at least " + std::to_string(memory_used) +
                " units, but only " + std::to_string(memory_limit) +
                " units are allowed. Either increase the memory limit " +
                "or change the strategy by using more sequential " + "steps.");
        }
        return;
    }

    int divm, divn, divk;
    std::tie(divm, divn, divk) = 
        math_utils::balanced_divisors(m, n, k, P, get_min_dim_size());

    long long additional_memory_A = 0;
    long long additional_memory_B = 0;
    long long additional_memory_C = 0;

    std::tie(additional_memory_A, additional_memory_B, additional_memory_C) 
        = maximum_memory(m, n, k, divm, divn, divk, P);

    std::vector<long long> new_memory_A = memory_A;
    std::vector<long long> new_memory_B = memory_B;
    std::vector<long long> new_memory_C = memory_C;

    new_memory_A.push_back(additional_memory_A);
    new_memory_B.push_back(additional_memory_B);
    new_memory_C.push_back(additional_memory_C);

    // if not enough memory for all of the proposed parallel steps
    // then perform a single sequential step and recompute again
    // best divm, divn, divk for the smaller problem
    long long used =
        memory_with_buffer_optimization(new_memory_A,
                                        new_memory_B,
                                        new_memory_C);
    while (used > memory_limit) {
        int div = 2;
        bool success = false;

        if (m >= std::max(k, n)) {
            // if m largest => split it
            success = add_step(m, n, k, P, 's', 'm', div);
        } else if (n >= std::max(m, k)) {
            // if n largest => split it
            success = add_step(m, n, k, P, 's', 'n', div);
        } else {
            // if k largest => split it
            success = add_step(m, n, k, P, 's', 'k', div);
        }

        if (!success) {
            throw_exception(std::string("Not enough memory for this strategy. ")
                  + "Either decrease the min_dim_size in the strategy "
                  + "to allow dimensions to be further split OR "
                  + "increase the memory limit in the strategy "
                  + "to allow COSMA to use more memory.");
        }

        std::tie(divm, divn, divk) = 
            math_utils::balanced_divisors(m, n, k, P, get_min_dim_size());

        std::tie(additional_memory_A, additional_memory_B, additional_memory_C) 
            = maximum_memory(m, n, k, divm, divn, divk, P);

        new_memory_A = memory_A;
        new_memory_B = memory_B;
        new_memory_C = memory_C;

        new_memory_A.push_back(additional_memory_A);
        new_memory_B.push_back(additional_memory_B);
        new_memory_C.push_back(additional_memory_C);
        used =
            memory_with_buffer_optimization(new_memory_A,
                                            new_memory_B,
                                            new_memory_C);
    }

    memory_used = used;

    P = divm * divn * divk;

    // find prime factors of divm, divn, divk
    std::vector<int> divm_factors = math_utils::decompose(divm);
    std::vector<int> divn_factors = math_utils::decompose(divn);
    std::vector<int> divk_factors = math_utils::decompose(divk);

    int mi, ni, ki;
    mi = ni = ki = 0;

    int total_divisors =
        divm_factors.size() + divn_factors.size() + divk_factors.size();

    // Iterate through all prime factors of divm, divn and divk and
    // divide each dimension with corresponding prime factors as long
    // as that dimension is the largest one.
    // Instead of dividing immediately m/divm, n/divn and k/divk,
    // it's always better to divide the dimension with smaller factors first
    // that are large enough to make that dimension NOT be the largest one
    // after division
    while (mi + ni + ki < total_divisors) {
        int i = mi + ni + ki;
        bool did_parallel = false;

        long long mm = mi >= divm_factors.size() ? 1 : m;
        long long nn = ni >= divn_factors.size() ? 1 : n;
        long long kk = ki >= divk_factors.size() ? 1 : k;

        if (mm >= std::max(nn, kk)) {
            did_parallel =
                divide(divm_factors, mi, m, n, k, P, 'm');
            if (did_parallel)
                continue;
        }

        if (nn >= std::max(mm, kk)) {
            did_parallel =
                divide(divn_factors, ni, m, n, k, P, 'n');
            if (did_parallel)
                continue;
        }

        if (kk >= std::max(mm, nn)) {
            did_parallel =
                divide(divk_factors, ki, m, n, k, P, 'k');
            if (did_parallel)
                continue;
        }

        if (!did_parallel) {
            throw_exception(std::string("Not enough memory for this strategy. ")
                  + "Either decrease the min_dim_size in the strategy "
                  + "to allow dimensions to be further split OR "
                  + "increase the memory limit in the strategy "
                  + "to allow COSMA to use more memory.");
        }
    }

    std::string step_type_shorter = "";
    std::string split_dimension_shorter = "";
    std::vector<int> divisors_shorter;
    this->P = 1;

    for (int i = 0; i < divisors.size(); ++i) {
        if (step_type[i] == 'p') {
            int div = divisors[i];
            while (i + 1 < divisors.size() && step_type[i + 1] == 'p' &&
                   split_dimension[i + 1] == split_dimension[i]) {
                div *= divisors[i + 1];
                i++;
            }
            step_type_shorter += "p";
            split_dimension_shorter += split_dimension[i];
            divisors_shorter.push_back(div);
            this->P *= div;
            continue;
        }

        int j = i;
        int divm = 1;
        int divn = 1;
        int divk = 1;

        while (step_type[j] == 's') {
            if (split_dimension[j] == 'm')
                divm *= divisors[j];
            else if (split_dimension[j] == 'n')
                divn *= divisors[j];
            else
                divk *= divisors[j];
            j++;
        }

        if (divm > 1) {
            split_dimension_shorter += "m";
            step_type_shorter += "s";
            divisors_shorter.push_back(divm);
        }
        if (divn > 1) {
            split_dimension_shorter += "n";
            step_type_shorter += "s";
            divisors_shorter.push_back(divn);
        }
        if (divk > 1) {
            split_dimension_shorter += "k";
            step_type_shorter += "s";
            divisors_shorter.push_back(divk);
        }

        i = j - 1;
    }

    split_dimension = split_dimension_shorter;
    step_type = step_type_shorter;
    divisors = divisors_shorter;
}

void Strategy::throw_exception(const std::string &message) {
    std::cout << "Splitting strategy not well defined.\n";
    std::cout << message << std::endl;
    std::cout << *this << std::endl;
    throw std::runtime_error(message);
}

bool Strategy::split_m(size_t i) const { return split_dimension[i] == 'm'; }

bool Strategy::split_n(size_t i) const { return split_dimension[i] == 'n'; }

bool Strategy::split_k(size_t i) const { return split_dimension[i] == 'k'; }

bool Strategy::split_A(size_t i) const { return split_m(i) || split_k(i); }

bool Strategy::split_B(size_t i) const { return split_k(i) || split_n(i); }

bool Strategy::split_C(size_t i) const { return split_m(i) || split_n(i); }

bool Strategy::split(char label, size_t step) const {
    if (label == 'A')
        return split_A(step);
    else if (label == 'B')
        return split_B(step);
    else
        return split_C(step);
}

bool Strategy::sequential_step(size_t i) const { return step_type[i] == 's'; }

bool Strategy::parallel_step(size_t i) const { return step_type[i] == 'p'; }

int Strategy::divisor(size_t i) const { return divisors[i]; }

int Strategy::divisor_m(size_t i) const { return split_m(i) ? divisors[i] : 1; }

int Strategy::divisor_n(size_t i) const { return split_n(i) ? divisors[i] : 1; }

int Strategy::divisor_k(size_t i) const { return split_k(i) ? divisors[i] : 1; }

int Strategy::divisor_row(char matrix, size_t i) const {
    if (matrix == 'A')
        return divisor_m(i);
    if (matrix == 'B')
        return divisor_k(i);
    if (matrix == 'C')
        return divisor_m(i);
    return 1;
}

int Strategy::divisor_col(char matrix, size_t i) const {
    if (matrix == 'A')
        return divisor_k(i);
    if (matrix == 'B')
        return divisor_n(i);
    if (matrix == 'C')
        return divisor_n(i);
    return 1;
}

bool Strategy::final_step(size_t i) const { return i == n_steps(); }

int Strategy::parallel_steps_before_gemm(char label) const {
    if (label == 'A')
        return n_parallel_steps_before_gemm_a;
    if (label == 'B')
        return n_parallel_steps_before_gemm_b;
    if (label == 'C')
        return n_parallel_steps_before_gemm_c;
    return -1;
}

// checks if the strategy is well-defined
void Strategy::check_if_valid() {
#ifdef DEBUG
    std::cout << "Checking if the following strategy is valid: " << std::endl;
    std::cout << *this << std::endl;
#endif
    if (empty() && P != 1) {
        throw_exception("Strategy empty but number of ranks P != 1");
    }

    int mi = m;
    int ni = n;
    int ki = k;
    int Pi = P;

    n_parallel_steps = 0;
    n_parallel_steps_before_gemm_a = 0;
    n_parallel_steps_before_gemm_b = 0;
    n_parallel_steps_before_gemm_c = 0;

    int P_a = 1;
    int P_b = 1;
    int P_c = 1;

    for (size_t i = 0; i < n_steps(); ++i) {
        if (divisors[i] <= 1) {
            throw_exception(
                std::string("Divisors in each step must be larger than 1.") +
                "Divisor in step " + std::to_string(i) + " = " +
                std::to_string(divisors[i]) + ".");
        }

        if (split_dimension[i] != 'm' && split_dimension[i] != 'n' &&
            split_dimension[i] != 'k') {
            throw_exception("Split dimension in each step must be m, n or k");
        }

        if (step_type[i] != 'p' && step_type[i] != 's') {
            throw_exception("Step type should be either p or s.");
        }

        if (step_type[i] == 'p') {
            n_parallel_steps++;
            if (!split_A(i)) {
                n_parallel_steps_before_gemm_a++;
            }
            if (!split_B(i)) {
                n_parallel_steps_before_gemm_b++;
            }
            if (!split_C(i)) {
                n_parallel_steps_before_gemm_c++;
            }

            if (Pi <= 1) {
                throw_exception(
                    std::string(
                        "Not enough processors for this division strategy.") +
                    "The product of all divisors in a parallel step should be "
                    "equal " +
                    "to the number of processors");
            }

            if (Pi % divisors[i] != 0) {
                throw_exception(std::string("The number of processors left in "
                                            "each parallel step ") +
                                "should be divisible by divisor.");
            }

            Pi /= divisors[i];
        } else {
            n_sequential_steps++;
            if (split_A(i)) {
                n_parallel_steps_before_gemm_a = 0;
            }
            if (split_B(i)) {
                n_parallel_steps_before_gemm_b = 0;
            }
            if (split_C(i)) {
                n_parallel_steps_before_gemm_c = 0;
            }
        }

        if (step_type[i] == 'p') {
            if (!split_A(i)) {
                P_a *= divisors[i];
            } else if (!split_B(i)) {
                P_b *= divisors[i];
            } else if (!split_C(i)) {
                P_c *= divisors[i];
            } else {
                throw_exception("Invalid strategy: In each step, some matrix has to be split.");
            }
        }

        if (split_dimension[i] == 'm') {
            mi /= divisors[i];
        } else if (split_dimension[i] == 'n') {
            ni /= divisors[i];
        } else if (split_dimension[i] == 'k') {
            ki /= divisors[i];
        } else {
            throw_exception("Unknown splitting dimension, should be m, n or k");
        }

        // if last step, check if #columns >= #processors that share this block
        // of matrix we only check dimensions n and k, because these are the
        // dimensions defining the number of columns, i.e. dimension m does not
        // denote the #columns of any matrix
        if (i == n_steps() - 1) {
            // since we are using column major ordering, the #columns of each
            // matrix must be at least the number of processors left at that
            // step
            if (ki < P_a) {
                throw_exception(std::string("Dimension k at step ") +
                                std::to_string(i) + " = " +
                                std::to_string(ki) +
                                ", which is less than the number of "
                                "processors left = " +
                                std::to_string(P_a));
            }
            if (ni < std::max(P_b, P_c)) {
                throw_exception(std::string("Dimension n at step ") +
                                std::to_string(i) + " = " +
                                std::to_string(ni) +
                                ", which is less than the number of "
                                "processors left = " +
                                std::to_string(std::min(P_b, P_c)));
            }
        }
    }
    if (Pi != 1) {
        throw_exception(
            std::string(
                "Too many processors. The number of processors should be ") +
            "equal to the product of divisors in all parallel steps.");
    }

    /*
    memory_used = required_memory(*this);
    // check if we have enough memory for this splitting strategy
    if (memory_limit < memory_used) {
        throw_exception("The splitting strategy requires memory \
                         for roughly " + std::to_string(memory_used) + " elements, \
                         but the memory limit is only " + std::to_string(memory_limit) + " elements. \
                         Either increase the memory limit or change the strategy. \
                         (Hint: you could use some sequential steps to spare some memory!)");
    }
    */
}

void Strategy::compress_steps() {
    int p_divm = 1;
    int p_divn = 1;
    int p_divk = 1;
    int s_divm = 1;
    int s_divn = 1;
    int s_divk = 1;

    for (size_t i = 0; i < split_dimension.size(); ++i) {
        if (parallel_step(i)) {
            p_divm *= divisor_m(i);
            p_divn *= divisor_n(i);
            p_divk *= divisor_k(i);
        } else {
            s_divm *= divisor_m(i);
            s_divn *= divisor_n(i);
            s_divk *= divisor_k(i);
        }
    }

    std::vector<int> divs = {p_divm, p_divn, p_divk, s_divm, s_divn, s_divk};

    divisors = std::vector<int>();
    split_dimension = "";
    step_type = "";

    for (size_t i = 0; i < divs.size(); ++i) {
        if (divs[i] > 1) {
            divisors.push_back(divs[i]);

            if (i < 3) {
                step_type += "p";
            } else {
                step_type += "s";
            }

            if (i % 3 == 0) {
                split_dimension += "m";
            } else if (i % 3 == 1) {
                split_dimension += "n";
            } else {
                split_dimension += "k";
            }
        }
    }
}

void Strategy::compute_min_sizes() {
    min_m = m;
    min_n = n;
    min_k = k;
    for (int step = 0; step < n_steps(); ++step) {
        min_m /= divisor_m(step);
        min_n /= divisor_n(step);
        min_k /= divisor_k(step);
    }
}

bool Strategy::should_overlap_comm_and_comp(int step) const {
    bool last_step = step == n_steps() - 1;
    if (!last_step) {
        return false;
    }

    int div = divisor(step);
    int divm = divisor_m(step);
    int divn = divisor_n(step);
    int divk = divisor_k(step);

    int newm = min_m;
    int newn = min_n;
    int newk = min_k;

    // overlap requires that the number of columns of the expanded matrix
    // i.e. the matrix that is not split is >= div, so that it can be split as
    // well
    bool overlap_possible = (split_m(step) && min_n >= div) ||
                            (split_n(step) && min_k >= div) ||
                            (split_k(step) && min_n >= div);

    if (split_m(step)) {
        newn /= div;
    } else if (split_n(step)) {
        newk /= div;
    } else {
        newn /= div;
    }

    bool overlap_turned_on = overlap_comm_and_comp;
    double score_no_overlap = math_utils::square_score(min_m, min_n, min_k);
    double score_with_overlap = math_utils::square_score(newm, newn, newk);
    auto diff = score_with_overlap - score_no_overlap;
    bool should_overlap = diff / score_no_overlap >= 0.5;

    // std::cout << "overlap_possible = " << overlap_possible << std::endl;
    // std::cout << "last_step = " << last_step << std::endl;
    // std::cout << "overlap_turned_on = " << overlap_turned_on << std::endl;
    // std::cout << "score_no_overlap = " << score_no_overlap << std::endl;
    // std::cout << "score_with_overlap = " << score_with_overlap << std::endl;
    // std::cout << "should_overlap = " << should_overlap << std::endl;
    bool condition = overlap_possible && overlap_turned_on && should_overlap;

#ifdef DEBUG
    std::cout << "Overlapping communication and computation." << std::endl;
#endif

    // return condition;
    return condition;
}

bool Strategy::empty() const {
    return n_steps() == 0;
}

int Strategy::n_rows(char label) const {
    if (label == 'A') 
        return m;
    if (label == 'B') 
        return k;
    if (label == 'C') 
        return m;
    return -1;
}

int Strategy::n_cols(char label) const {
    if (label == 'A') 
        return k;
    if (label == 'B') 
        return n;
    if (label == 'C') 
        return n;

    return -1;
}

// enables overlapping and updates the value of the `irregular` variable
void Strategy::enable_overlapping_comm_and_comp() {
    int last_step = n_steps() - 1;

    // if comm and comp are overlapped, then in the last step
    // the #columns of the matrix which was not split in that step
    // are being split by the same divisor to allow the overlap
    if (split_m(last_step) && min_n >= divisor_m(last_step)) {
        // overlap only possible if min_n >= divisor_m(last_step)
        overlap_comm_and_comp = true;
        if (overlap_comm_and_comp) {
            // if m is split, then B is not split and thus min_n is also split
            irregular = irregular || (min_n % divisor_m(last_step) != 0);
        }
    } else if (split_n(last_step) && min_k >= divisor_n(last_step)) {
        overlap_comm_and_comp = true;
        if (overlap_comm_and_comp) {
            // if n is split, then A is not split and thus min_k is also split
            irregular = irregular || (min_k % divisor_n(last_step) != 0);
        }
    } else if (split_k(last_step) && min_n >= divisor_k(last_step)) {
        overlap_comm_and_comp = true;
        if (overlap_comm_and_comp) {
            // if k is split, then C is not split and thus min_n is also split
            irregular = irregular || (min_n % divisor_k(last_step) != 0);
        }
    }
}

// the strategy is considered irregular if any dimension
// (at any step) is divided by a divisor that does not perfectly
// divide that dimension
void Strategy::check_if_irregular() {
    int mm = m;
    int nn = n;
    int kk = k;
    for (int i = 0; i < n_steps(); ++i) {
        if (mm % divisor_m(i) != 0) {
            irregular = true;
            return;
        }
        if (nn % divisor_n(i) != 0) {
            irregular = true;
            return;
        }
        if (kk % divisor_k(i) != 0) {
            irregular = true;
            return;
        }
        mm /= divisor_m(i);
        nn /= divisor_n(i);
        kk /= divisor_k(i);
    }
    irregular = false;
}

std::ostream &operator<<(std::ostream &os, const Strategy &other) {
    os << "Matrix dimensions (m, n, k) = (" << other.m << ", " << other.n
       << ", " << other.k << ")\n";
    os << "Number of processors: " << other.P << "\n";
    if (other.topology) {
        os << "Communication-aware topology turned on.\n";
    }
    if (other.overlap_comm_and_comp) {
        os << "Overlap of communication and computation: ON.\n";
        if (other.use_busy_waiting) {
            os << "Communication-thread policy (for overlap): "
               << "busy-waiting (using blocking one-sided MPI).\n";
        } else {
            os << "Communication-thread policy (for overlap): "
               << "polling (using non-blocking one-sided MPI).\n";
        }
    } else {
        os << "Overlap of communication and computation: OFF.\n";
    }
    os << "Divisions strategy: \n";
    for (size_t i = 0; i < other.n_steps(); ++i) {
        if (other.step_type[i] == 'p') {
            os << "parallel (" << other.split_dimension[i] << " / "
               << other.divisors[i] << ")\n";
        } else {
            os << "sequential (" << other.split_dimension[i] << " / "
               << other.divisors[i] << ")\n";
        }
    }
    os << "Required memory per rank (in #elements): " << other.memory_used
       << "\n";
    os << "Available memory per rank (in #elements): ";
    if (other.memory_limit < std::numeric_limits<long long>::max())
        os << other.memory_limit;
    else
        os << "not specified (assumed: infinite)";
    os << "\n";
    return os;
}
} // namespace cosma


================================================
FILE: src/cosma/strategy.hpp
================================================
#pragma once

#include <unordered_map>
#include <iostream>
#include <limits>
#include <math.h>
#include <sstream>
#include <stdexcept>
#include <string>
#include <tuple>
#include <vector>

#include <cosma/math_utils.hpp>

namespace cosma {
class Strategy {
  public:
    // matrix dimensions
    int m = 0;
    int n = 0;
    int k = 0;
    // number of processors
    size_t P = 0;

    long long memory_limit = 0;

    // minimum problem size per rank
    // the total number of ranks will be reduced
    // if the problem size per rank is too small
    // by default = 200
    int get_min_dim_size();

    // the actual minimum problem size
    // that is induced by given strategy
    int min_m = 0;
    int min_n = 0;
    int min_k = 0;

    // stores the divisor in each step of the algorithm
    std::vector<int> divisors = {};
    // returns m, n or k character depending on
    // which dimension was split in each step
    std::string split_dimension = "";
    // describes whether a sequential step (s) or a parallel step (p) is used in
    // each step
    std::string step_type = "";
    // if true, MPI will try to relabel ranks such that
    // the ranks which communicate are physically close to each other
    bool topology = false;
    // if true, uses busy waiting in the thread performing MPI communication
    // otherwise, uses polling to query if the communication request has
    // completed
    bool use_busy_waiting = true;
    long long memory_used = 0;
    int n_parallel_steps = 0;
    int n_sequential_steps = 0;

    int n_parallel_steps_before_gemm_a = 0;
    int n_parallel_steps_before_gemm_b = 0;
    int n_parallel_steps_before_gemm_c = 0;

    // constructors
    Strategy();
    // copy constructor
    Strategy(Strategy &other);
    Strategy(const Strategy &other);

    // Strategy& operator=(const Strategy& other) = default;
    // Strategy& operator=(Strategy& other) = default;

    Strategy(int mm,
             int nn,
             int kk,
             size_t PP,
             std::vector<int> &divs,
             std::string &dims,
             std::string &types,
             long long mem_limit = std::numeric_limits<long long>::max(),
             bool top = false,
             bool overlap = false,
             bool busy_waiting = true);

    Strategy(int mm,
             int nn,
             int kk,
             size_t PP,
             long long mem_limit = std::numeric_limits<long long>::max(),
             bool top = false,
             bool overlap = false,
             bool busy_waiting = true);

    // number of steps of the algorithm
    size_t n_steps() const;

    // strategy that tries to make each base case as square as possible
    // it always uses all the resources (all P available ranks) but tries to
    // find divm, divn and divk such that divm * divn * divk = P and m/divm =
    // n/divn = k/divk. if there is not enough memory in some step, then a
    // sequential step is performed and new divm, divn and divk are found that
    // correspond to the new subproblem.
    void square_strategy(bool& should_optimize);

    bool add_step(long long& prev_m, long long& prev_n, long long& prev_k,
                  int& prev_P, char step, char dim_label, int divisor);

    void throw_exception(const std::string &message);

    bool split_m(size_t i) const;
    bool split_n(size_t i) const;
    bool split_k(size_t i) const;

    bool split_A(size_t i) const;
    bool split_B(size_t i) const;
    bool split_C(size_t i) const;
    bool split(char label, size_t i) const;

    bool sequential_step(size_t i) const;
    bool parallel_step(size_t i) const;

    int divisor(size_t i) const;
    int divisor_m(size_t i) const;
    int divisor_n(size_t i) const;
    int divisor_k(size_t i) const;

    int divisor_row(char matrix, size_t i) const;
    int divisor_col(char matrix, size_t i) const;

    bool final_step(size_t i) const;
    int parallel_steps_before_gemm(char label) const;

    static std::tuple<long long, long long, long long>
    initial_memory(long long m, long long n, long long k, int P);

    // checks if the strategy is well-defined
    void check_if_valid();
    void check_if_overlap_possible();
    // prefers a single division by (a*b) over two divisions (one by a and one
    // by b)
    void compress_steps();

    bool should_overlap_comm_and_comp(int step) const;

    bool operator==(const Strategy &other) const;
    bool operator!=(const Strategy &other) const;

    friend std::ostream &operator<<(std::ostream &os, const Strategy &other);

    void compute_min_sizes();

    // if number of processes is 0, then n_steps = 0
    // and then the strategy is considered empty
    bool empty() const;

    // returns dimensions of a matrix with given label
    // where label = A, B or C
    int n_rows(char label) const;
    int n_cols(char label) const;

    void enable_overlapping_comm_and_comp();

    void check_if_irregular();

    // the strategy is considered irregular if any dimension
    // (at any step) is divided by a divisor that does not perfectly
    // divide that dimension
    bool irregular = true;

  private:
    // if true, the communication and computation will be overlapped
    // this variable should not be changed outside of the class but only
    // through the function `enable_overlapping_comm_and_comp`.
    // because this function also has to update the variable `irregular`
    // when the overlap is turned on.
    bool overlap_comm_and_comp = false;

    bool divide(std::vector<int> &div_factors,
                int &dim_i,
                long long &dim1,
                long long &dim2,
                long long &dim3,
                int &P,
                const char label);
};
} // namespace cosma


================================================
FILE: src/cosma/timer.hpp
================================================
#pragma once

#include <chrono>
#include <iostream>
#include <mpi.h>
#include <string>

namespace cosma {
class Timer {
  public:
    using time_point =
        std::chrono::time_point<std::chrono::high_resolution_clock>;

    int n_rep_;
    std::string region;
    MPI_Comm comm_;
    time_point start;

    Timer(int n_rep, std::string reg = "", MPI_Comm comm = MPI_COMM_WORLD)
        : n_rep_(n_rep)
        , region(reg)
        , comm_(comm) {
        MPI_Barrier(comm);
        start = std::chrono::high_resolution_clock::now();
    }

    ~Timer() {
        auto finish = std::chrono::high_resolution_clock::now();
        std::chrono::duration<double> elapsed = finish - start;
        auto time =
            std::chrono::duration_cast<std::chrono::milliseconds>(elapsed)
                .count();
        long long max_time, min_time, sum_time;
        MPI_Reduce(&time, &max_time, 1, MPI_LONG_LONG, MPI_MAX, 0, comm_);
        MPI_Reduce(&time, &min_time, 1, MPI_LONG_LONG, MPI_MIN, 0, comm_);
        MPI_Reduce(&time, &sum_time, 1, MPI_LONG_LONG, MPI_SUM, 0, comm_);
        int rank, size;
        MPI_Comm_rank(comm_, &rank);
        MPI_Comm_size(comm_, &size);
        if (rank == 0) {
            std::cout << region << " MIN TIME [ms]: " << 1.0 * min_time / n_rep_
                      << std::endl;
            std::cout << region << " MAX TIME [ms]: " << 1.0 * max_time / n_rep_
                      << std::endl;
            std::cout << region
                      << " AVG TIME [ms]: " << 1.0 * sum_time / (n_rep_ * size)
                      << std::endl;
            std::cout << "\n";
        }
    }
};
} // namespace cosma


================================================
FILE: src/cosma/two_sided_communicator.cpp
================================================
#include <cosma/interval.hpp>
#include <cosma/math_utils.hpp>
#include <cosma/matrix.hpp>
#include <cosma/mpi_mapper.hpp>
#include <cosma/profiler.hpp>
#include <cosma/strategy.hpp>
#include <cosma/two_sided_communicator.hpp>

#include <mpi.h>

#include <algorithm>
#include <atomic>
#include <chrono>
#include <complex>
#include <future>
#include <iostream>
#include <stdlib.h>
#include <thread>
#include <tuple>

namespace cosma {

namespace two_sided_communicator {
// two_sided_communicator() = default;
// two_sided_communicator(const Strategy* strategy, MPI_Comm comm):
//     communicator::communicator(strategy, comm) {}

/*
 * (first see the comment in communicator.hpp)
 * The idea is the following:
 *      - if only 1 block per rank should be communicated:
 *        don't allocate new space, just perform all-gather
 *
 *      - if more than 1 blocks per rank should be communicated:
 *        allocate new space and let the all-gather be performed
 *        on the level of all blocks per rank. After the communication,
 *        reshuffle the local data by putting first blocks from each rank
 * first, then all second blocks from each rank and so on.
 */
template <typename Scalar>
void copy(MPI_Comm comm,
          int rank,
          int div,
          Interval &P,
          Scalar *in,
          Scalar *out,
          Scalar *reshuffle_buffer,
          std::vector<std::vector<int>> &size_before,
          std::vector<int> &total_before,
          int total_after) {
    PE(multiply_communication_other);
    // int div = strategy_->divisor(step);
    // MPI_Comm subcomm = active_comm(step);
    int gp, off;
    std::tie(gp, off) = P.locate_in_subinterval(div, rank);

    int relative_rank = rank - P.first();
    int local_size = total_before[relative_rank];

    int sum = 0;
    std::vector<int> total_size(div);
    std::vector<int> dspls(div);
    // int off = offset(P, div);

    std::vector<int> subgroup(div);
    bool same_size = true;

    for (int i = 0; i < div; ++i) {
        int target = P.locate_in_interval(div, i, off);
        int temp_size = total_before[target];
        dspls[i] = sum;
        sum += temp_size;
        total_size[i] = temp_size;
        same_size &= temp_size == local_size;
    }

    int n_blocks = size_before[relative_rank].size();
    Scalar *receive_pointer = n_blocks > 1 ? reshuffle_buffer : out;
    PL();

    auto mpi_type = mpi_mapper<Scalar>::getType();
    PE(multiply_communication_copy);
    if (same_size) {
        MPI_Allgather(in,
                      local_size,
                      mpi_type,
                      receive_pointer,
                      local_size,
                      mpi_type,
                      comm);
    } else {
        MPI_Allgatherv(in,
                       local_size,
                       mpi_type,
                       receive_pointer,
                       total_size.data(),
                       dspls.data(),
                       mpi_type,
                       comm);
    }
    PL();

    PE(multiply_communication_other);
    if (n_blocks > 1) {
        int index = 0;
        std::vector<int> block_offset(div);
        // order all first sequential parts of all groups first and so on..
        for (int block = 0; block < n_blocks; block++) {
            for (int rank = 0; rank < div; rank++) {
                int target = P.locate_in_interval(div, rank, off);
                int dsp = dspls[rank] + block_offset[rank];
                int b_size = size_before[target][block];
                std::copy(reshuffle_buffer + dsp,
                          reshuffle_buffer + dsp + b_size,
                          out + index);
                index += b_size;
                block_offset[rank] += b_size;
            }
        }
    }
    PL();
#ifdef DEBUG
    std::cout << "Content of the copied matrix in rank " << rank
              << " is now: " << std::endl;
    for (int j = 0; j < sum; j++) {
        std::cout << out[j] << " , ";
    }
    std::cout << std::endl;

#endif
}

template <typename Scalar>
void reduce(MPI_Comm comm,
            int rank,
            int div,
            Interval &P,
            Scalar *LC, // expanded_matrix
            Scalar *C,  // original matrix
            Scalar *reshuffle_buffer,
            Scalar *reduce_buffer,
            std::vector<std::vector<int>> &c_current,
            std::vector<int> &c_total_current,
            std::vector<std::vector<int>> &c_expanded,
            std::vector<int> &c_total_expanded,
            Scalar beta) {
    PE(multiply_communication_other);
    // int div = strategy_->divisor(step);
    // MPI_Comm subcomm = active_comm(step);

    std::vector<int> subgroup(div);

    int gp, off;
    std::tie(gp, off) = P.locate_in_subinterval(div, rank);
    // int gp, off;
    // std::tie(gp, off) = group_and_offset(P, div);

    // reorder the elements as:
    // first all blocks that should be sent to rank 0 then all blocks for
    // rank 1 and so on...
    int n_blocks = c_expanded[off].size();
    std::vector<int> block_offset(n_blocks);
    Scalar *send_pointer = n_blocks > 1 ? reshuffle_buffer : LC;

    int sum = 0;
    for (int i = 0; i < n_blocks; ++i) {
        block_offset[i] = sum;
        sum += c_expanded[off][i];
    }

    std::vector<int> recvcnts(div);

    bool same_size = true;
    int index = 0;
    // go through the communication ring
    for (int i = 0; i < div; ++i) {
        int target = P.locate_in_interval(div, i, off);
        recvcnts[i] = c_total_current[target];

        same_size = same_size && recvcnts[i] == recvcnts[0];

        if (n_blocks > 1) {
            for (int block = 0; block < n_blocks; ++block) {
                int b_offset = block_offset[block];
                int b_size = c_current[target][block];
                std::copy(LC + b_offset,
                          LC + b_offset + b_size,
                          reshuffle_buffer + index);
                index += b_size;
                block_offset[block] += b_size;
            }
        }
    }

    Scalar *receive_pointer = beta != Scalar{0} ? reduce_buffer : C;
    PL();

    auto mpi_type = mpi_mapper<Scalar>::getType();
    PE(multiply_communication_reduce);

    if (same_size) {
        MPI_Reduce_scatter_block(send_pointer,
                           receive_pointer,
                           recvcnts[0],
                           mpi_type,
                           MPI_SUM,
                           comm);
    } else {
        MPI_Reduce_scatter(send_pointer,
                           receive_pointer,
                           recvcnts.data(),
                           mpi_type,
                           MPI_SUM,
                           comm);
    }
    PL();

    PE(multiply_communication_other);
    if (beta != Scalar{0}) {
        // sum up receiving_buffer with C
        for (int el = 0; el < recvcnts[gp]; ++el) {
            C[el] = beta * C[el] + reduce_buffer[el];
        }
    }
    PL();
}

template void copy<float>(MPI_Comm comm,
                          int rank,
                          int div,
                          Interval &P,
                          float *in,
                          float *out,
                          float *reshuffle_buffer,
                          std::vector<std::vector<int>> &size_before,
                          std::vector<int> &total_before,
                          int total_after);

template void copy<double>(MPI_Comm comm,
                           int rank,
                           int div,
                           Interval &P,
                           double *in,
                           double *out,
                           double *reshuffle_buffer,
                           std::vector<std::vector<int>> &size_before,
                           std::vector<int> &total_before,
                           int total_after);

template void
copy<std::complex<float>>(MPI_Comm comm,
                          int rank,
                          int div,
                          Interval &P,
                          std::complex<float> *in,
                          std::complex<float> *out,
                          std::complex<float> *reshuffle_buffer,
                          std::vector<std::vector<int>> &size_before,
                          std::vector<int> &total_before,
                          int total_after);

template void
copy<std::complex<double>>(MPI_Comm comm,
                           int rank,
                           int div,
                           Interval &P,
                           std::complex<double> *in,
                           std::complex<double> *out,
                           std::complex<double> *reshuffle_buffer,
                           std::vector<std::vector<int>> &size_before,
                           std::vector<int> &total_before,
                           int total_after);

template void reduce<float>(MPI_Comm comm,
                            int rank,
                            int div,
                            Interval &P,
                            float *LC,
                            float *C,
                            float *reshuffle_buffer,
                            float *reduce_buffer,
                            std::vector<std::vector<int>> &c_current,
                            std::vector<int> &c_total_current,
                            std::vector<std::vector<int>> &c_expanded,
                            std::vector<int> &c_total_expanded,
                            float beta);

template void reduce<double>(MPI_Comm comm,
                             int rank,
                             int div,
                             Interval &P,
                             double *LC,
                             double *C,
                             double *reshuffle_buffer,
                             double *reduce_buffer,
                             std::vector<std::vector<int>> &c_current,
                             std::vector<int> &c_total_current,
                             std::vector<std::vector<int>> &c_expanded,
                             std::vector<int> &c_total_expanded,
                             double beta);

template void
reduce<std::complex<float>>(MPI_Comm comm,
                            int rank,
                            int div,
                            Interval &P,
                            std::complex<float> *LC,
                            std::complex<float> *C,
                            std::complex<float> *reshuffle_buffer,
                            std::complex<float> *reduce_buffer,
                            std::vector<std::vector<int>> &c_current,
                            std::vector<int> &c_total_current,
                            std::vector<std::vector<int>> &c_expanded,
                            std::vector<int> &c_total_expanded,
                            std::complex<float> beta);

template void
reduce<std::complex<double>>(MPI_Comm comm,
                             int rank,
                             int div,
                             Interval &P,
                             std::complex<double> *LC,
                             std::complex<double> *C,
                             std::complex<double> *reshuffle_buffer,
                             std::complex<double> *reduce_buffer,
                             std::vector<std::vector<int>> &c_current,
                             std::vector<int> &c_total_current,
                             std::vector<std::vector<int>> &c_expanded,
                             std::vector<int> &c_total_expanded,
                             std::complex<double> beta);

} // end namespace two_sided_communicator

} // namespace cosma


================================================
FILE: src/cosma/two_sided_communicator.hpp
================================================
#pragma once

#include <cosma/interval.hpp>
#include <cosma/math_utils.hpp>
#include <cosma/matrix.hpp>
#include <cosma/strategy.hpp>

#include <mpi.h>

#include <vector>

namespace cosma {

namespace two_sided_communicator {

/*
 * (first see the comment in communicator.hpp)
 * The idea is the following:
 *      - if only 1 block per rank should be communicated:
 *        don't allocate new space, just perform all-gather
 *
 *      - if more than 1 blocks per rank should be communicated:
 *        allocate new space and let the all-gather be performed
 *        on the level of all blocks per rank. After the communication,
 *        reshuffle the local data by putting first blocks from each rank
 * first, then all second blocks from each rank and so on.
 */
template <typename Scalar>
void copy(MPI_Comm comm,
          int rank,
          int div,
          Interval &P,
          Scalar *in,
          Scalar *out,
          Scalar *reshuffle_buffer,
          std::vector<std::vector<int>> &size_before,
          std::vector<int> &total_before,
          int total_after);

template <typename Scalar>
void reduce(MPI_Comm comm,
            int rank,
            int div,
            Interval &P,
            Scalar *LC,
            Scalar *C,
            Scalar *reshuffle_buffer,
            Scalar *reduce_buffer,
            std::vector<std::vector<int>> &c_current,
            std::vector<int> &c_total_current,
            std::vector<std::vector<int>> &c_expanded,
            std::vector<int> &c_total_expanded,
            Scalar beta);

} // namespace two_sided_communicator

} // namespace cosma


================================================
FILE: tests/CMakeLists.txt
================================================
add_custom_target(tests COMMENT "Builds all tests.")

# CACHE GTest main into an OBJECT library to avoid recompiling.
#
add_library(main_gtest_mpi OBJECT main_gtest_mpi.cpp)
target_link_libraries(main_gtest_mpi PRIVATE MPI::MPI_CXX gtest_mpi)

add_library(main_gtest OBJECT main_gtest.cpp)
target_link_libraries(main_gtest PRIVATE gtest)

#  Unit tests
#
function(add_cosma_mpi_test test_name num_procs)
    set(tgt_ test.${test_name})
    add_executable(${tgt_} ${test_name}.cpp)
    target_link_libraries(${tgt_} PRIVATE main_gtest_mpi gtest_mpi ${ARGN})
    target_include_directories(${tgt_} PRIVATE .)
    add_test(NAME ${tgt_}
             WORKING_DIRECTORY ${EXECUTABLE_OUTPUT_PATH}
             COMMAND ${MPIEXEC} ${MPIEXEC_NUMPROC_FLAG} ${num_procs}
                     ${MPIEXEC_PREFLAGS} ./${tgt_} ${MPIEXEC_POSTFLAGS})
    add_dependencies(tests ${tgt_})
    install(TARGETS ${tgt_} DESTINATION "${CMAKE_INSTALL_BINDIR}")
endfunction()

add_executable(test.mapper mapper.cpp)
target_link_libraries(test.mapper PRIVATE main_gtest gtest cosma)
add_test(NAME test.mapper COMMAND test.mapper)
add_dependencies(tests test.mapper)
install(TARGETS test.mapper DESTINATION "${CMAKE_INSTALL_BINDIR}")

if(NOT COSMA_SCALAPACK MATCHES "OFF")
    add_cosma_mpi_test(pdgemm 16 cosma_pxgemm_cpp)
endif()

add_cosma_mpi_test(multiply_using_layout 4 cosma)
add_cosma_mpi_test(multiply 16 cosma)
add_cosma_mpi_test(scalar_matmul 8 cosma)


================================================
FILE: tests/main_gtest.cpp
================================================
#include <gtest/gtest.h>

int main(int argc, char **argv) {
    // init gtest
    ::testing::InitGoogleTest(&argc, argv);
    // run all tests
    return RUN_ALL_TESTS();
}


================================================
FILE: tests/main_gtest_mpi.cpp
================================================
#include <gtest/gtest.h>
#include <gtest_mpi/gtest_mpi.hpp>

int main(int argc, char **argv) {
    MPI_Init(&argc, &argv);

    // init gtest
    ::testing::InitGoogleTest(&argc, argv);

    // new test environment (makes a copy of MPI_COMM_WORLD)
    ::testing::AddGlobalTestEnvironment(new gtest_mpi::MPITestEnvironment());

    auto &test_listeners = ::testing::UnitTest::GetInstance()->listeners();

    // replace the default listener with the custom one
    delete test_listeners.Release(test_listeners.default_result_printer());
    test_listeners.Append(new gtest_mpi::PrettyMPIUnitTestResultPrinter());

    // run all tests
    auto exit_code = RUN_ALL_TESTS();

    MPI_Finalize();

    return exit_code;
}


================================================
FILE: tests/mapper.cpp
================================================
#include <cosma/mapper.hpp>

#include <gtest/gtest.h>

using namespace cosma;
TEST(mapper, rpa_256) {
    int k = 13893632;
    int m = 34816;
    int n = 34816;
    int nodes = 1024;
    int ranks_per_node = 1;
    int P = nodes * ranks_per_node;
    // memory_limit /= ranks_per_node;
    //
    std::string split_dimension = "kmkm";
    std::string step_type = "sspp";
    std::vector<int> divisors = {2, 2, 512, 2};

    Strategy strategy(m, n, k, P, divisors, split_dimension, step_type);
    std::cout << strategy << std::endl;

    for (int rank = 0; rank < P; ++rank) {
        EXPECT_NO_THROW(Mapper mapper_a('A', strategy, rank));
        EXPECT_NO_THROW(Mapper mapper_b('B', strategy, rank));
        EXPECT_NO_THROW(Mapper mapper_c('C', strategy, rank));

        Mapper mapper_a('A', strategy, rank);
        Mapper mapper_b('B', strategy, rank);
        Mapper mapper_c('C', strategy, rank);

        EXPECT_NO_THROW(auto cosma_grid_a = mapper_a.get_layout_grid());
        EXPECT_NO_THROW(auto cosma_grid_b = mapper_b.get_layout_grid());
        EXPECT_NO_THROW(auto cosma_grid_c = mapper_c.get_layout_grid());
    }
}

TEST(strategy, rpa_128) {
    int m = 17408;
    int n = 17408;
    int k = 3473408;
    long long memory_limit = 52428800; // #elements per rank
    int nodes = 128;
    int ranks_per_node = 36;
    int P = nodes * ranks_per_node;
    // memory_limit /= ranks_per_node;

    // Strategy::min_dim_size = 32;
    Strategy strategy(m, n, k, P, memory_limit);

    std::cout << "Strategy = " << strategy << std::endl;
    std::cout << "n seq steps = " << strategy.n_sequential_steps << std::endl;

    EXPECT_TRUE(strategy.n_sequential_steps > 0);

    EXPECT_EQ(strategy.split_dimension, "kkmnkmnkn");

    EXPECT_EQ(strategy.step_type, "spppppppp");

    std::vector<int> target_divisors = {16, 16, 2, 2, 2, 3, 2, 3, 2};
    EXPECT_EQ(strategy.divisors, target_divisors);
}

TEST(strategy, nested_sequential_parallel) {
    int m = 30000;
    int n = m;
    int k = m;
    long long memory_limit =
        80000000; // #elements, per node, corresponding to 50GB
    int nodes = 10;
    int ranks_per_node = 36;
    int P = nodes * ranks_per_node;
    // memory_limit /= ranks_per_node;

    // Strategy::min_dim_size = 32;
    Strategy strategy(m, n, k, P, memory_limit);

    std::cout << "Strategy = " << strategy << std::endl;
    std::cout << "n seq steps = " << strategy.n_sequential_steps << std::endl;
    // EXPECT_TRUE(strategy.n_seq_steps > 0);
}

TEST(mapper, bdb) {
    auto m = 8u;
    auto n = 4u;
    auto k = 2u;
    std::string types = "psp";
    std::vector<int> divisors = {2, 2, 2};
    std::string dims = "mmn";
    auto P = 4u;

    Strategy strategy(m, n, k, P, divisors, dims, types);

    // the last element is rank = 0, but regardless of this parameter
    // the mapper can compute the buffers sizes or the mapper for any rank
    Mapper A('A', strategy, 0);
    Mapper B('B', strategy, 0);
    Mapper C('C', strategy, 0);

    std::cout << "A = " << A.P() << std::endl;

    // test initial sizes for all ranks
    std::vector<int> A_initial_size_target = {4, 4, 4, 4};
    std::vector<int> B_initial_size_target = {2, 2, 2, 2};
    std::vector<int> C_initial_size_target = {8, 8, 8, 8};

    for (auto i = 0u; i < P; ++i) {
        EXPECT_EQ(A.initial_size(i), A_initial_size_target[i]);
        EXPECT_EQ(B.initial_size(i), B_initial_size_target[i]);
        EXPECT_EQ(C.initial_size(i), C_initial_size_target[i]);
    }
    // check if the precomputed local->global mapper for
    // local elements on the current rank match
    // the results from the on-demand computed local->global
    // mapper for any element and any rank
    // we only check if this holds for rank 0
    for (auto i = 0u; i < A_initial_size_target[0]; ++i) {
        int gi, gj;
        std::tie(gi, gj) = A.global_coordinates(i, 0);
        int gi_local, gj_local;
        std::tie(gi_local, gj_local) = A.global_coordinates(i);
        EXPECT_EQ(gi, gi_local);
        EXPECT_EQ(gj, gj_local);
    }

    for (auto i = 0u; i < B_initial_size_target[0]; ++i) {
        int gi, gj;
        std::tie(gi, gj) = B.global_coordinates(i, 0);
        int gi_local, gj_local;
        std::tie(gi_local, gj_local) = B.global_coordinates(i);
        EXPECT_EQ(gi, gi_local);
        EXPECT_EQ(gj, gj_local);
    }

    for (auto i = 0u; i < C_initial_size_target[0]; ++i) {
        int gi, gj;
        std::tie(gi, gj) = C.global_coordinates(i, 0);
        int gi_local, gj_local;
        std::tie(gi_local, gj_local) = C.global_coordinates(i);
        EXPECT_EQ(gi, gi_local);
        EXPECT_EQ(gj, gj_local);
    }

    // test rank_to_range map which specified for each rank, the list of
    // Interval2D it owns
    std::vector<std::vector<Interval2D>> A_rank_to_range_target = {
        {Interval2D(0, 1, 0, 0), Interval2D(2, 3, 0, 0)},
        {Interval2D(0, 1, 1, 1), Interval2D(2, 3, 1, 1)},
        {Interval2D(4, 5, 0, 0), Interval2D(6, 7, 0, 0)},
        {Interval2D(4, 5, 1, 1), Interval2D(6, 7, 1, 1)}};

    std::vector<Interval2D> B_rank_to_range_target = {Interval2D(0, 1, 0, 0),
                                                      Interval2D(0, 1, 2, 2),
                                                      Interval2D(0, 1, 1, 1),
                                                      Interval2D(0, 1, 3, 3)};

    std::vector<std::vector<Interval2D>> C_rank_to_range_target = {
        {Interval2D(0, 1, 0, 1), Interval2D(2, 3, 0, 1)},
        {Interval2D(0, 1, 2, 3), Interval2D(2, 3, 2, 3)},
        {Interval2D(4, 5, 0, 1), Interval2D(6, 7, 0, 1)},
        {Interval2D(4, 5, 2, 3), Interval2D(6, 7, 2, 3)}};

    auto A_rank_to_range = A.complete_layout();
    auto B_rank_to_range = B.complete_layout();
    auto C_rank_to_range = C.complete_layout();

    for (auto i = 0u; i < P; ++i) {
        EXPECT_EQ(A_rank_to_range[i].size(), 2);
        EXPECT_EQ(B_rank_to_range[i].size(), 1);
        EXPECT_EQ(C_rank_to_range[i].size(), 2);

        for (auto range = 0u; range < A_rank_to_range[i].size(); ++range) {
            EXPECT_EQ(A_rank_to_range[i][range],
                      A_rank_to_range_target[i][range]);
        }

        EXPECT_EQ(B_rank_to_range[i][0], B_rank_to_range_target[i]);

        for (auto range = 0u; range < C_rank_to_range[i].size(); ++range) {
            EXPECT_EQ(C_rank_to_range[i][range],
                      C_rank_to_range_target[i][range]);
        }
    }

    // test the mapping global<->local
    std::vector<std::pair<int, int>> A_global_coord = {{0, 0},
                                                       {1, 0},
                                                       {2, 0},
                                                       {3, 0},
                                                       {0, 1},
                                                       {1, 1},
                                                       {2, 1},
                                                       {3, 1},
                                                       {4, 0},
                                                       {5, 0},
                                                       {6, 0},
                                                       {7, 0},
                                                       {4, 1},
                                                       {5, 1},
                                                       {6, 1},
                                                       {7, 1}};

    std::vector<std::pair<int, int>> B_global_coord = {
        {0, 0}, {1, 0}, {0, 2}, {1, 2}, {0, 1}, {1, 1}, {0, 3}, {1, 3}};

    auto i = 0u;
    for (auto rank = 0u; rank < P; ++rank) {
        for (auto locIdx = 0u; locIdx < A_initial_size_target[rank]; ++locIdx) {
            int gi, gj;
            std::tie(gi, gj) = A.global_coordinates(locIdx, rank);
            EXPECT_EQ(A_global_coord[i].first, gi);
            EXPECT_EQ(A_global_coord[i].second, gj);

            int l, r;
            std::tie(l, r) = A.local_coordinates(gi, gj);
            EXPECT_EQ(l, locIdx);
            EXPECT_EQ(r, rank);
            ++i;
        }
    }

    i = 0u;
    for (auto rank = 0u; rank < P; ++rank) {
        for (auto locIdx = 0u; locIdx < B_initial_size_target[rank]; ++locIdx) {
            int gi, gj;
            std::tie(gi, gj) = B.global_coordinates(locIdx, rank);
            EXPECT_EQ(B_global_coord[i].first, gi);
            EXPECT_EQ(B_global_coord[i].second, gj);

            int l, r;
            std::tie(l, r) = B.local_coordinates(gi, gj);
            EXPECT_EQ(l, locIdx);
            EXPECT_EQ(r, rank);
            ++i;
        }
    }
}


================================================
FILE: tests/multiply.cpp
================================================
#include "../utils/cosma_utils.hpp"
#include <initializer_list>

#include <gtest/gtest.h>
#include <gtest_mpi/gtest_mpi.hpp>

MPI_Comm subcommunicator(int new_P, MPI_Comm comm = MPI_COMM_WORLD) {
    // original size
    int P;
    MPI_Comm_size(comm, &P);

    // original group
    MPI_Group group;
    MPI_Comm_group(comm, &group);

    // new comm and new group
    MPI_Comm newcomm;
    MPI_Group newcomm_group;

    // ranks to exclude
    std::vector<int> exclude_ranks;
    for (int i = new_P; i < P; ++i) {
        exclude_ranks.push_back(i);
    }

    // create reduced group
    MPI_Group_excl(
        group, exclude_ranks.size(), exclude_ranks.data(), &newcomm_group);
    // create reduced communicator
    MPI_Comm_create_group(comm, newcomm_group, 0, &newcomm);

    MPI_Group_free(&group);
    MPI_Group_free(&newcomm_group);

    return newcomm;
}

struct multiply_state {
    int m = 10;
    int n = 10;
    int k = 10;
    int P = 2;
    std::vector<int> divs;
    std::string dims = "";
    std::string step_types = "";

    multiply_state() = default;

    multiply_state(int mm, int nn, int kk, int PP,
                   std::vector<int> divisors,
                   std::string dim,
                   std::string steps)
        : m(mm)
        , n(nn)
        , k(kk)
        , P(PP)
        , divs(divisors)
        , dims(dim)
        , step_types(steps)
    {}

    multiply_state(int mm, int nn, int kk, int PP)
        : m(mm)
        , n(nn)
        , k(kk)
        , P(PP)
    {}

    static int& get_test_counter() {
        static int test_counter = 0;
        return test_counter;
    }

    friend std::ostream &operator<<(std::ostream &os,
                                    const multiply_state &obj) {
        return os << "(m, n, k) = (" << obj.m << ", " << obj.n << ", " << obj.k
                  << ")" << std::endl
                  << "Number of ranks: " << obj.P << std::endl
                  << "Strategy: " << obj.dims << ", " << obj.step_types << std::endl;
    }
};

struct MultiplyTest : testing::Test {
    cosma::context<double> ctx;
    std::unique_ptr<multiply_state> state;

    MultiplyTest() {
        ctx = cosma::make_context<double>();
        state = std::make_unique<multiply_state>();
    }
};

struct MultiplyTestWithParams : MultiplyTest,
                                testing::WithParamInterface<multiply_state> {
    MultiplyTestWithParams() = default;
};

TEST_P(MultiplyTestWithParams, multiply) {
    double epsilon = 1e-8;
    auto state = GetParam();

    MPI_Barrier(MPI_COMM_WORLD);

    int rank;
    MPI_Comm_rank(MPI_COMM_WORLD, &rank);

    int m = state.m;
    int n = state.n;
    int k = state.k;
    int P = state.P;
    MPI_Comm comm = subcommunicator(P);

    if (rank >= P) {
        ++multiply_state::get_test_counter();
        ++multiply_state::get_test_counter();
    }

    if (rank < P) {
        // Strategy::min_dim_size = 32;
        Strategy strategy(m, n, k, P, state.divs, state.dims, state.step_types);
        if (rank == 0) {
            std::cout << "Strategy = " << strategy << std::endl;
        }

        // first run without overlapping communication and computation
        bool no_overlap = test_cosma<double>(strategy, ctx, comm, epsilon,
                                             multiply_state::get_test_counter());
        ++multiply_state::get_test_counter();

        EXPECT_TRUE(no_overlap);

        // then run with the overlap of communication and computation
        strategy.enable_overlapping_comm_and_comp();
        bool with_overlap = test_cosma<double>(strategy, ctx, comm, epsilon, multiply_state::get_test_counter());
        ++multiply_state::get_test_counter();
        EXPECT_TRUE(with_overlap);

        MPI_Comm_free(&comm);
    }
}

std::vector<multiply_state> generate_tests() {
    return {
        multiply_state(4, 4, 4, 1),
        multiply_state(3, 4, 5, 1),

        // strategy: pm2,sm2,pn2
        multiply_state(8, 4, 2, 4,
                    {2, 2, 2}, // divisors
                    "mmn", // split dimensions
                    "psp" // step types
        ),
        multiply_state(8, 4, 2, 4),

        multiply_state(4, 4, 4, 2, 
                       {2},// divisors
                       "m", // split dimensions
                       "p" // step types
        ),
        multiply_state(4, 4, 4, 2),

        multiply_state(4, 4, 4, 4, 
                       {2, 2, 2},// divisors
                       "mnn", // split dimensions
                       "spp" // step types
        ),

        // -strategy: sm2,pn2,pn2
        multiply_state(4, 4, 4, 4, 
                       {2, 2, 2},// divisors
                       "mnn", // split dimensions
                       "spp" // step types
        ),

        multiply_state(30, 35, 40, 4),
        // strategy: sm2,sm2,pn2
        multiply_state(8, 8, 2, 2,
            {2, 2, 2}, // divisors
            "mmn", // split dimensions
            "ssp" // step types
        ),
        multiply_state(8, 8, 2, 2),

        // strategy: pm2,pm2
        multiply_state(16, 4, 4, 4,
                       {2, 2}, // divisors
                       "mm", // split dimensions
                       "pp" // step types
        ),
        multiply_state(16, 4, 4, 4),

        // startegy: sk2,pm3
        multiply_state(20, 20, 20, 3,
                       {2, 3}, // divisors
                       "km", // split dimensions
                       "sp" // step types
        ),
        multiply_state(20, 20, 20, 3),

        // strategy: pm2,pn2,pk2,pm2
        multiply_state(16, 16, 16, 16,
                       {2, 2, 2, 2}, // divisors
                       "mnkm", // split dimensions
                       "pppp" // step types
        ),
        multiply_state(16, 16, 16, 16),

        // strategy: sm2,sn2,pk2,pm2
        multiply_state(20, 30, 25, 4,
                       {2, 2, 2, 2}, // divisors
                       "mnkm", // split dimensions
                       "sspp" // step types
        ),
        multiply_state(20, 30, 25, 4),

        // strategy: sm2,pn2,sk2,pm5
        multiply_state(100, 100, 100, 10,
                       {2, 2, 2, 5}, // divisors
                       "mnkm", // split dimensions
                       "spsp" // step types
        ),
        multiply_state(100, 100, 100, 10),

        // strategy: sm2,pn2,sk2,pm2
        multiply_state(4, 4, 5, 4, 
                       {2, 2, 2, 2},  // divisors
                       "mnkm", // split dimensions
                       "spsp" // step types
        ),

        multiply_state(4, 4, 5, 4),

        // strategy: pm2,pn2,pk3
        multiply_state(10, 10, 10, 12,
                       {2, 2, 3}, // divisors
                       "mnk", // split dimensions
                       "ppp" // step types
        ),

        // strategy: pm2,pn2,pk3
        multiply_state(100, 100, 100, 12,
                       {2, 2, 3}, // divisors
                       "mnk", // split dimensions
                       "ppp" // step types
        ),

        multiply_state(100, 100, 100, 12),

        multiply_state(100, 100, 100, 4),

        // strategy: pm7
        multiply_state(100, 100, 100, 7,
                       {7}, // divisors
                       "m", // split dimensions
                       "p" // step types
        ),
        multiply_state(100, 100, 100, 7),

        // strategy: sm2,pn2,sk2,pm2,sn2,pk2
        multiply_state(100, 100, 100, 8,
                       {2, 2, 2, 2, 2, 2}, // divisors
                       "mnkmnk", // split dimensions
                       "spspsp" // step types
        ),
        multiply_state(100, 100, 100, 8),

        // strategy: pm2,pk2
        multiply_state(31, 32, 33, 4,
                       {2, 2}, // divisors
                       "mk", // split dimensions
                       "pp" // step types
        ),
        // strategy: pm2,pk2
        multiply_state(100, 100, 100, 4,
                       {2, 2}, // divisors
                       "mk", // split dimensions
                       "pp" // step types
        ),
        // strategy: pm2,pk2,pn2
        multiply_state(100, 100, 100, 8,
                       {2, 2}, // divisors
                       "mk", // split dimensions
                       "pp" // step types
        ),

        // strategy: sm2,sk2,sn2,pn2,pm2,pk2
        multiply_state(100, 100, 100, 8,
                       {2, 2, 2, 2, 2, 2}, // divisors
                       "mknnmk", // split dimensions
                       "sssppp" // step types
        ),

        // strategy: sk2,pm2,sn2,pk2,sm2,pn2
        multiply_state(100, 100, 100, 8,
                       {2, 2, 2, 2, 2, 2}, // divisors
                       "kmnkmn", // split dimensions
                       "spspsp" // step types
        ),

        // strategy: sk3,sm3,sn3,pk2,pn2,pm2
        multiply_state(200, 200, 200, 8,
                       {3, 3, 3, 2, 2, 2}, // divisors
                       "kmnknm", // split dimensions
                       "sssppp" // step types
        ),
        multiply_state(200, 200, 200, 8),

        // strategy: sm3,pn2,sk3,pm2,sn3,pk2
        multiply_state(200, 200, 200, 8,
                       {3, 2, 3, 2, 3, 2}, // divisors
                       "mnkmnk", // split dimensions
                       "spspsp" // step types
        ), 

        multiply_state(512, 32, 736, 8,
                       {2, 2, 2},
                       "kmk",
                       "ppp"
        )
    };
};

INSTANTIATE_TEST_CASE_P(
    Default,
    MultiplyTestWithParams,
    testing::ValuesIn(generate_tests()));


================================================
FILE: tests/multiply_using_layout.cpp
================================================
#include <cosma/local_multiply.hpp>
#include <cosma/multiply.hpp>

#include <gtest/gtest.h>
#include <gtest_mpi/gtest_mpi.hpp>
#include <mpi.h>

#include <cmath>
#include <limits>


MPI_Comm subcommunicator(int new_P, MPI_Comm comm = MPI_COMM_WORLD) {
    // original size
    int P;
    MPI_Comm_size(comm, &P);

    // original group
    MPI_Group group;
    MPI_Comm_group(comm, &group);

    // new comm and new group
    MPI_Comm newcomm;
    MPI_Group newcomm_group;

    // ranks to exclude
    std::vector<int> exclude_ranks;
    for (int i = new_P; i < P; ++i) {
        exclude_ranks.push_back(i);
    }
    // create reduced group
    MPI_Group_excl(group, exclude_ranks.size(), exclude_ranks.data(), &newcomm_group);
    // create reduced communicator
    MPI_Comm_create_group(comm, newcomm_group, 0, &newcomm);

    MPI_Group_free(&group);
    MPI_Group_free(&newcomm_group);

    return newcomm;
}

template <typename scalar>
void fill_matrix(cosma::CosmaMatrix<scalar> &M) {
    for (int idx = 0; idx < M.matrix_size(); ++idx) {
        M.matrix_pointer()[idx] = std::sin(idx);
    }
};

// !!! [NOTE] The test depends on correct implementation of `multiply()`.
//
TEST(MultiplyUsingLayout, ) {
    using scalar_t = double;
    constexpr int nprocs = 4;
    constexpr int m = 20;
    constexpr int n = 20;
    constexpr int k = 80;
    constexpr scalar_t alpha = 1;
    constexpr scalar_t beta = 1;
    char transa = 'N';
    char transb = 'N';

    int rank;
    MPI_Comm_rank(MPI_COMM_WORLD, &rank);
    MPI_Comm comm = subcommunicator(nprocs, MPI_COMM_WORLD);

    if (rank < nprocs) {
        // cosma::Strategy::min_dim_size = 32;
        cosma::Strategy strategy(m, n, k, nprocs);

        // create a separate context
        auto ctx = cosma::make_context<scalar_t>();

        // these matrices have to be created within a context
        // that is separated from the context that is used
        // in multiply_using_layout, because the memory pool
        // might resize within multiply_using_layout
        // and the A_grid pointers might become outdated
        cosma::CosmaMatrix<scalar_t> A(ctx, 'A', strategy, rank);
        cosma::CosmaMatrix<scalar_t> B(ctx, 'B', strategy, rank);
        cosma::CosmaMatrix<scalar_t> C(ctx, 'C', strategy, rank);

        fill_matrix(A);
        fill_matrix(B);
        fill_matrix(C);

        // important if beta > 0
        cosma::CosmaMatrix<scalar_t> C_act(ctx, 'C', strategy, rank);
        for (int idx = 0; idx < C_act.matrix_size(); ++idx) {
            C_act.matrix_pointer()[idx] = C.matrix_pointer()[idx];
        }

        auto A_grid = A.get_grid_layout();
        auto B_grid = B.get_grid_layout();
        auto C_grid = C.get_grid_layout();

        // This routine should not generally be used for COSMA matrices.
        // If it is used, then we must ensure the context of matrices A_grid, B_grid and C_grid
        // is not the default context (singleton) that multiply_using_layout is using
        // because the pointers A_grid, B_grid and C_grid must be persistent 
        // throughout the whole execution of multiply_using_layout
        cosma::multiply_using_layout(A_grid, B_grid, C_grid, alpha, beta, transa, transb, comm);

        cosma::multiply(A, B, C_act, strategy, comm, alpha, beta);

        // ----- Checks for data integrity

        scalar_t *C_data = C.matrix_pointer();
        scalar_t *C_act_data = C_act.matrix_pointer();

        // Check if sizes match.
        //
        ASSERT_EQ(C.matrix_size(), C_act.matrix_size());

        // Check if data elements match. Fail if an element doesn't match.
        //
        for (int i = 0; i < C_act.matrix_size(); ++i) {
            ASSERT_DOUBLE_EQ(C_data[i], C_act_data[i]);
        }
    }
}


================================================
FILE: tests/pdgemm.cpp
================================================
#include "../utils/pxgemm_utils.hpp"
#include <cosma/strategy.hpp>

#include <gtest/gtest.h>
#include <gtest_mpi/gtest_mpi.hpp>
#include <vector>
#include <iostream>

MPI_Comm subcommunicator(int new_P, MPI_Comm comm = MPI_COMM_WORLD) {
    // original size
    int P;
    MPI_Comm_size(comm, &P);

    // original group
    MPI_Group group;
    MPI_Comm_group(comm, &group);

    // new comm and new group
    MPI_Comm newcomm;
    MPI_Group newcomm_group;

    // ranks to exclude
    std::vector<int> exclude_ranks;
    for (int i = new_P; i < P; ++i) {
        exclude_ranks.push_back(i);
    }
    // create reduced group
    MPI_Group_excl(group, exclude_ranks.size(), exclude_ranks.data(), &newcomm_group);
    // create reduced communicator
    MPI_Comm_create_group(comm, newcomm_group, 0, &newcomm);

    MPI_Group_free(&group);
    MPI_Group_free(&newcomm_group);

    return newcomm;
}

struct PdgemmTest : testing::Test {
    std::unique_ptr<cosma::pxgemm_params<double>> state;

    PdgemmTest() {
        state = std::make_unique<cosma::pxgemm_params<double>>();
    }
};

struct PdgemmTestWithParams : PdgemmTest,
                              testing::WithParamInterface<cosma::pxgemm_params<double>> {
    PdgemmTestWithParams() = default;
};

TEST_P(PdgemmTestWithParams, pdgemm) {
    auto state = GetParam();

    MPI_Barrier(MPI_COMM_WORLD);

    int rank;
    MPI_Comm_rank(MPI_COMM_WORLD, &rank);
    MPI_Comm comm = subcommunicator(state.P, MPI_COMM_WORLD);

    if (rank < state.P) {
        if (rank == 0) {
            std::cout << state << std::endl;
        }

        // cosma::Strategy::min_dim_size = 32;
        bool correct = test_pxgemm<double>(state, comm);
        EXPECT_TRUE(correct);

        MPI_Comm_free(&comm);
    }
};

INSTANTIATE_TEST_CASE_P(
    Default,
    PdgemmTestWithParams,
    testing::Values(
        // edge cases, which are allowed by the standard (m, n or k can be 0)
        cosma::pxgemm_params<double>{
            // matrix dimensions
            10, 10, // matrix A
            10, 10, // matrix B
            10, 10, // matrix C

            // block sizes
            2, 2, // matrix A
            2, 2, // matrix B
            2, 2, // matrix C

            // submatrices ij
            1, 1, // matrix A
            1, 1, // matrix B
            1, 1, // matrix C

            // problem size
            0, 5, 5,

            // transpose flags
            'N', 'T',

            // scaling flags
            1.0, 1.0,

            // leading dims
            10, 10, 10,

            // proc grid
            2, 2, 'R',

            // proc srcs
            0, 0, // matrix A
            0, 0, // matrix B
            0, 0  // matrix C
        },

        cosma::pxgemm_params<double>{
            // matrix dimensions
            10, 10, // matrix A
            10, 10, // matrix B
            10, 10, // matrix C

            // block sizes
            2, 2, // matrix A
            2, 2, // matrix B
            2, 2, // matrix C

            // submatrices ij
            1, 1, // matrix A
            1, 1, // matrix B
            1, 1, // matrix C

            // problem size
            5, 0, 5,

            // transpose flags
            'N', 'T',

            // scaling flags
            1.0, 1.0,

            // leading dims
            10, 10, 10,

            // proc grid
            2, 2, 'R',

            // proc srcs
            0, 0, // matrix A
            0, 0, // matrix B
            0, 0  // matrix C
        },

        cosma::pxgemm_params<double>{
            // matrix dimensions
            10, 10, // matrix A
            10, 10, // matrix B
            10, 10, // matrix C

            // block sizes
            2, 2, // matrix A
            2, 2, // matrix B
            2, 2, // matrix C

            // submatrices ij
            1, 1, // matrix A
            1, 1, // matrix B
            1, 1, // matrix C

            // problem size
            0, 0, 0,

            // transpose flags
            'N', 'T',

            // scaling flags
            1.0, 1.0,

            // leading dims
            10, 10, 10,

            // proc grid
            2, 2, 'R',

            // proc srcs
            0, 0, // matrix A
            0, 0, // matrix B
            0, 0  // matrix C
        },

        cosma::pxgemm_params<double>{
            // matrix dimensions
            10, 10, // matrix A
            10, 10, // matrix B
            10, 10, // matrix C

            // block sizes
            2, 2, // matrix A
            2, 2, // matrix B
            2, 2, // matrix C

            // submatrices ij
            1, 1, // matrix A
            1, 1, // matrix B
            1, 1, // matrix C

            // problem size
            10, 0, 0,

            // transpose flags
            'N', 'T',

            // scaling flags
            1.0, 1.0,

            // leading dims
            10, 10, 10,

            // proc grid
            2, 2, 'R',

            // proc srcs
            0, 0, // matrix A
            0, 0, // matrix B
            0, 0  // matrix C
        },

        // scaling matrix C checking (k=0)
        cosma::pxgemm_params<double>{
            // matrix dimensions
            10, 10, // matrix A
            10, 10, // matrix B
            10, 10, // matrix C

            // block sizes
            2, 2, // matrix A
            2, 2, // matrix B
            2, 2, // matrix C

            // submatrices ij
            1, 1, // matrix A
            1, 1, // matrix B
            1, 1, // matrix C

            // problem size
            10, 10, 0,

            // transpose flags
            'N', 'T',

            // scaling flags
            1.0, 1.2,

            // leading dims
            10, 10, 10,

            // proc grid
            2, 2, 'R',

            // proc srcs
            0, 0, // matrix A
            0, 0, // matrix B
            0, 0  // matrix C
        },

        // scaling matrix C checking (k=0, irregular)
        cosma::pxgemm_params<double>{
            // matrix dimensions
            23, 34, // matrix A
            34, 53, // matrix B
            23, 53, // matrix C

            // block sizes
            2, 3, // matrix A
            4, 5, // matrix B
            5, 7, // matrix C

            // submatrices ij
            1, 2, // matrix A
            2, 3, // matrix B
            3, 4, // matrix C

            // problem size
            7, 11, 0,

            // transpose flags
            'N', 'N',

            // scaling flags
            1.0, 1.2,

            // leading dims
            53, 54, 55,

            // proc grid
            2, 3, 'R',

            // proc srcs
            0, 0, // matrix A
            0, 0, // matrix B
            0, 0  // matrix C
        },

        cosma::pxgemm_params<double>{4, 4, 4, 2, 2, 2, 2, 1, 'T', 'N', 1.0, 0.0},

        // scaling matrix C checking (alpha = 0)
        cosma::pxgemm_params<double>{
            // matrix dimensions
            10, 10, // matrix A
            10, 10, // matrix B
            10, 10, // matrix C

            // block sizes
            2, 2, // matrix A
            2, 2, // matrix B
            2, 2, // matrix C

            // submatrices ij
            1, 1, // matrix A
            1, 1, // matrix B
            1, 1, // matrix C

            // problem size
            10, 10, 10,

            // transpose flags
            'N', 'T',

            // scaling flags
            0.0, 1.2,

            // leading dims
            10, 10, 10,

            // proc grid
            2, 2, 'R',

            // proc srcs
            0, 0, // matrix A
            0, 0, // matrix B
            0, 0  // matrix C
        },

        // scaling matrix C checking (alpha = 0, irregular)
        cosma::pxgemm_params<double>{
            // matrix dimensions
            23, 34, // matrix A
            34, 53, // matrix B
            23, 53, // matrix C

            // block sizes
            2, 3, // matrix A
            4, 5, // matrix B
            5, 7, // matrix C

            // submatrices ij
            1, 2, // matrix A
            2, 3, // matrix B
            3, 4, // matrix C

            // problem size
            7, 11, 5,

            // transpose flags
            'N', 'N',

            // scaling flags
            0.0, 1.2,

            // leading dims
            53, 54, 55,

            // proc grid
            2, 3, 'R',

            // proc srcs
            0, 0, // matrix A
            0, 0, // matrix B
            0, 0  // matrix C
        },

        // alpha = 1.0, beta = 0.0
        // single process
        cosma::pxgemm_params<double>{10, 10, 10, 2, 2, 2, 1, 1, 'N', 'N', 1.0, 0.0},
        cosma::pxgemm_params<double>{10, 11, 13, 2, 2, 2, 1, 1, 'N', 'N', 1.0, 0.0},

        // default values of alpha and beta
        cosma::pxgemm_params<double>{10, 10, 10, 2, 2, 2, 2, 2, 'N', 'N', 1.0, 0.0},
        cosma::pxgemm_params<double>{5, 5, 5, 2, 2, 2, 2, 2, 'N', 'N', 1.0, 0.0},
        cosma::pxgemm_params<double>{5, 5, 5, 2, 2, 2, 2, 2, 'T', 'N', 1.0, 0.0},
        cosma::pxgemm_params<double>{8, 4, 8, 2, 2, 2, 3, 2, 'N', 'N', 1.0, 0.0},
        cosma::pxgemm_params<double>{8, 4, 8, 2, 2, 2, 3, 2, 'T', 'N', 1.0, 0.0},

        // different values of alpha and beta
        cosma::pxgemm_params<double>{10, 12, 12, 2, 2, 2, 2, 2, 'T', 'N', 1.0, 0.0},
        cosma::pxgemm_params<double>{10, 11, 12, 3, 2, 3, 3, 2, 'T', 'N', 1.0, 0.0},
        cosma::pxgemm_params<double>{10, 11, 12, 3, 2, 3, 3, 2, 'T', 'N', 1.0, 1.0},
        cosma::pxgemm_params<double>{10, 11, 12, 3, 2, 3, 3, 2, 'T', 'N', 0.0, 0.0},
        cosma::pxgemm_params<double>{10, 11, 12, 3, 2, 3, 3, 2, 'T', 'N', 0.0, 1.0},
        cosma::pxgemm_params<double>{10, 11, 12, 3, 2, 3, 3, 2, 'T', 'N', 0.5, 0.5},

        // alpha = 0.5, beta = 0.0
        cosma::pxgemm_params<double>{10, 10, 10, 2, 2, 2, 2, 2, 'N', 'N', 0.5, 0.0},
        cosma::pxgemm_params<double>{5, 5, 5, 2, 2, 2, 2, 2, 'N', 'N', 0.5, 0.0},
        cosma::pxgemm_params<double>{5, 5, 5, 2, 2, 2, 2, 2, 'T', 'N', 0.5, 0.0},
        cosma::pxgemm_params<double>{8, 4, 8, 2, 2, 2, 3, 2, 'N', 'N', 0.5, 0.0},
        cosma::pxgemm_params<double>{8, 4, 8, 2, 2, 2, 3, 2, 'T', 'N', 0.5, 0.0},

        // too many resources
        cosma::pxgemm_params<double>{2, 2, 8, 2, 2, 4, 2, 1, 'T', 'N', 1.0, 0.0},
        cosma::pxgemm_params<double>{4, 4, 24, 4, 4, 8, 2, 1, 'T', 'N', 1.0, 0.0},
        cosma::pxgemm_params<double>{16, 16, 96, 16, 16, 32, 2, 1, 'T', 'N', 1.0, 0.0},
        cosma::pxgemm_params<double>{16, 16, 96, 32, 32, 32, 2, 8, 'T', 'N', 0.5, 0.5},
        cosma::pxgemm_params<double>{13, 13, 448, 13, 13, 13, 2, 7, 'T', 'N', 0.5, 0.5},
        cosma::pxgemm_params<double>{13, 13, 448, 13, 13, 13, 2, 7, 'N', 'N', 1.0, 0.5},

        cosma::pxgemm_params<double>{3, 3, 7, 3, 3, 3, 1, 1, 'T', 'N', 1.0, 0.0},
        cosma::pxgemm_params<double>{5, 5, 11, 5, 5, 5, 2, 1, 'T', 'N', 1.0, 0.0},
        cosma::pxgemm_params<double>{26, 13, 448, 13, 13, 13, 2, 1, 'T', 'N', 1.0, 0.5},
        cosma::pxgemm_params<double>{26, 13, 448, 13, 13, 13, 2, 7, 'T', 'N', 1.0, 0.5},

        // adapt strategy to scalapack grid when P = 1
        cosma::pxgemm_params<double>{
            // matrix dimensions
            1280, 128, // matrix A
            1280, 128, // matrix B
            128, 128, // matrix C

            // block sizes
            32, 32, // matrix A
            32, 32, // matrix B
            32, 32, // matrix C

            // submatrices ij
            1, 1, // matrix A
            1, 1, // matrix B
            1, 1, // matrix C

            // problem size
            128, 128, 1280,

            // transpose flags
            'T', 'N',

            // scaling flags
            1.0, 0.0,

            // leading dims
            1280, 1280, 128,

            // proc grid
            1, 1, 'C',

            // proc srcs
            0, 0, // matrix A
            0, 0, // matrix B
            0, 0  // matrix C
        },

        // detailed pdgemm call with ia, ja = 1
        cosma::pxgemm_params<double>{
            // matrix dimensions
            1280, 1280, // matrix A
            1280, 1280, // matrix B
            1280, 1280, // matrix C

            // block sizes
            32, 32, // matrix A
            32, 32, // matrix B
            32, 32, // matrix C

            // submatrices ij
            1, 1, // matrix A
            1, 1, // matrix B
            1, 1, // matrix C

            // problem size
            512, 32, 736,

            // transpose flags
            'N', 'T',

            // scaling flags
            1.0, 0.0,

            // leading dims
            640, 640, 640,

            // proc grid
            2, 4, 'R',

            // proc srcs
            0, 0, // matrix A
            0, 0, // matrix B
            0, 0  // matrix C
        },

        // detailed pdgemm call
        cosma::pxgemm_params<double>{
            // matrix dimensions
            1280, 1280, // matrix A
            1280, 1280, // matrix B
            1280, 1280, // matrix C

            // block sizes
            32, 32, // matrix A
            32, 32, // matrix B
            32, 32, // matrix C

            // submatrices ij
            1, 545, // matrix A
            513, 545, // matrix B
            1, 513, // matrix C

            // problem size
            512, 32, 736,

            // transpose flags
            'N', 'T',

            // scaling flags
            1.0, 0.0,

            // leading dims
            640, 640, 640,

            // proc grid
            2, 4, 'R',

            // proc srcs
            0, 0, // matrix A
            0, 0, // matrix B
            0, 0  // matrix C
        },

        // detailed pdgemm call
        cosma::pxgemm_params<double>{
            // matrix dimensions
            1280, 1280, // matrix A
            1280, 1280, // matrix B
            1280, 1280, // matrix C

            // block sizes
            32, 32, // matrix A
            32, 32, // matrix B
            32, 32, // matrix C

            // submatrices ij
            1, 545, // matrix A
            513, 545, // matrix B
            1, 513, // matrix C

            // problem size
            512, 32, 736,

            // transpose flags
            'N', 'T',

            // scaling flags
            1.0, 1.0,

            // leading dims
            640, 640, 640,

            // proc grid
            2, 4, 'R',

            // proc srcs
            0, 0, // matrix A
            0, 0, // matrix B
            0, 0  // matrix C
        },

        cosma::pxgemm_params<double>{
            // matrix dimensions
            1000, 10, // matrix A
            1000, 10, // matrix B
            10, 10, // matrix C

            // block sizes
            128, 128, // matrix A
            128, 128, // matrix B
            128, 128, // matrix C

            // submatrices ij
            1, 1, // matrix A
            1, 1, // matrix B
            1, 1, // matrix C

            // problem size
            10, 10, 1000,

            // transpose flags
            'T', 'N',

            // scaling flags
            1.0, 0.0,

            // leading dims
            512, 512, 10,

            // proc grid
            2, 2, 'R',

            // proc srcs
            0, 0, // matrix A
            0, 0, // matrix B
            0, 0  // matrix C
        },


        cosma::pxgemm_params<double>{
            // matrix dimensions
            1824, 128, // matrix A
            1824, 128, // matrix B
            128, 128, // matrix C

            // block sizes
            32, 32, // matrix A
            32, 32, // matrix B
            32, 32, // matrix C

            // submatrices ij
            1, 1, // matrix A
            1, 1, // matrix B
            1, 1, // matrix C

            // problem size
            128, 128, 1824,

            // transpose flags
            'T', 'N',

            // scaling flags
            1.0, 0.0,

            // leading dims
            928, 928, 64,

            // proc grid
            2, 4, 'R',

            // proc srcs
            0, 0, // matrix A
            0, 0, // matrix B
            0, 0  // matrix C
        },

        cosma::pxgemm_params<double>{
            // matrix dimensions
            43417, 217, // matrix A
            43417, 217, // matrix B
            217, 217, // matrix C

            // block sizes
            169, 108, // matrix A
            169, 108, // matrix B
            108, 108, // matrix C

            // submatrices ij
            1, 1, // matrix A
            1, 1, // matrix B
            1, 1, // matrix C

            // problem size
            217, 217, 43417,

            // transpose flags
            'T', 'N',

            // scaling flags
            1.0, 0.0,

            // leading dims
            54272, 54272, 1088,

            // proc grid
            8, 1, 'R',

            // proc srcs
            0, 0, // matrix A
            0, 0, // matrix B
            0, 0  // matrix C
        },

        cosma::pxgemm_params<double>{
            // matrix dimensions
            43176, 217, // matrix A
            43176, 217, // matrix B
            2176, 217, // matrix C

            // block sizes
            1696, 108, // matrix A
            1696, 108, // matrix B
            1088, 108, // matrix C

            // submatrices ij
            1, 1, // matrix A
            1, 1, // matrix B
            1, 1, // matrix C

            // problem size
            217, 217, 43176,

            // transpose flags
            'T', 'N',

            // scaling flags
            1.0, 0.0,

            // leading dims
            54272, 54272, 1088,

            // proc grid
            8, 1, 'R',

            // proc srcs
            0, 0, // matrix A
            0, 0, // matrix B
            0, 0  // matrix C
        },

	// CP2K runs from H2O-sos-mp2-lr
        cosma::pxgemm_params<double>{
            // matrix dimensions
            23, 23, // matrix A
            23, 23, // matrix B
            23, 23, // matrix C

            // block sizes
            12, 12, // matrix A
            12, 12, // matrix B
            12, 12, // matrix C

            // submatrices ij
            1, 1, // matrix A
            1, 1, // matrix B
            1, 1, // matrix C

            // problem size
            23, 23, 4,

            // transpose flags
            'N', 'T',

            // scaling flags
            1.0, 0.0,

            // leading dims
            12, 12, 12,

            // proc grid
            2, 1, 'C',

            // proc srcs
            0, 0, // matrix A
            0, 0, // matrix B
            0, 0  // matrix C
        },

        cosma::pxgemm_params<double>{
            // matrix dimensions
            23, 23, // matrix A
            23, 23, // matrix B
            23, 23, // matrix C

            // block sizes
            12, 12, // matrix A
            12, 12, // matrix B
            12, 12, // matrix C

            // submatrices ij
            1, 1, // matrix A
            1, 1, // matrix B
            1, 1, // matrix C

            // problem size
            23, 23, 4,

            // transpose flags
            'N', 'T',

            // scaling flags
            1.0, -1.0,

            // leading dims
            12, 12, 12,

            // proc grid
            2, 1, 'C',

            // proc srcs
            0, 0, // matrix A
            0, 0, // matrix B
            0, 0  // matrix C
        },

        cosma::pxgemm_params<double>{
            // matrix dimensions
            83, 83, // matrix A
            83, 83, // matrix B
            83, 83, // matrix C

            // block sizes
            32, 32, // matrix A
            32, 32, // matrix B
            32, 32, // matrix C

            // submatrices ij
            1, 1, // matrix A
            1, 1, // matrix B
            1, 1, // matrix C

            // problem size
            83, 83, 83,

            // transpose flags
            'N', 'T',

            // scaling flags
            1.0, 0.0,

            // leading dims
            83, 83, 83,

            // proc grid
            1, 1, 'C',

            // proc srcs
            0, 0, // matrix A
            0, 0, // matrix B
            0, 0  // matrix C
        },

        cosma::pxgemm_params<double>{
            // matrix dimensions
            83, 83, // matrix A
            83, 77, // matrix B
            83, 77, // matrix C

            // block sizes
            32, 32, // matrix A
            32, 32, // matrix B
            32, 32, // matrix C

            // submatrices ij
            1, 1, // matrix A
            1, 1, // matrix B
            1, 1, // matrix C

            // problem size
            83, 77, 83,

            // transpose flags
            'T', 'N',

            // scaling flags
            1.0, 0.0,

            // leading dims
            83, 83, 83,

            // proc grid
            1, 1, 'C',

            // proc srcs
            0, 0, // matrix A
            0, 0, // matrix B
            0, 0  // matrix C
        },

        cosma::pxgemm_params<double>{
            // matrix dimensions
            83, 83, // matrix A
            83, 77, // matrix B
            83, 77, // matrix C

            // block sizes
            32, 32, // matrix A
            32, 32, // matrix B
            32, 32, // matrix C

            // submatrices ij
            1, 1, // matrix A
            1, 1, // matrix B
            1, 1, // matrix C

            // problem size
            83, 77, 83,

            // transpose flags
            'T', 'N',

            // scaling flags
            1.0, 0.0,

            // leading dims
            83, 83, 83,

            // proc grid
            1, 1, 'R',

            // proc srcs
            0, 0, // matrix A
            0, 0, // matrix B
            0, 0  // matrix C
        }
    )
);


================================================
FILE: tests/scalar_matmul.cpp
================================================
#include <gtest/gtest.h>
#include <gtest_mpi/gtest_mpi.hpp>

#include <string>
#include "../utils/cosma_utils.hpp"

template <typename Scalar>
void test_matmul() {
    constexpr int m = 100;
    constexpr int n = 100;
    constexpr int k = 100;
    constexpr int P = 8;
    std::string step_types = "spspsp";
    std::string dims = "mnkmnk";
    std::vector<int> divs = {2, 2, 2, 2, 2, 2};

    auto comm = MPI_COMM_WORLD;
    int rank;
    MPI_Comm_rank(comm, &rank);

    Strategy strategy(m, n, k, P, divs, dims, step_types);

    if (rank == 0) {
        std::cout << "Strategy = " << strategy << std::endl;
    }

    auto ctx = cosma::make_context<Scalar>();

    // first run without overlapping communication and computation
    bool no_overlap = test_cosma<Scalar>(strategy, ctx, comm, 1e-2, 0);
    ASSERT_TRUE(no_overlap);

    // enable the ovelap of comm and comp
    strategy.enable_overlapping_comm_and_comp();

    // then run with the overlap of communication and computation
    bool with_overlap = test_cosma<Scalar>(strategy, ctx, comm, 1e-2, 1);
    ASSERT_TRUE(with_overlap);
}

TEST(Multiply, Float) { test_matmul<float>(); }

TEST(Multiply, Double) { test_matmul<double>(); }

TEST(Multiply, ComplexFloat) { test_matmul<std::complex<float>>(); }

TEST(Multiply, ComplexDouble) { test_matmul<std::complex<double>>(); }


================================================
FILE: utils/cosma_utils.hpp
================================================
#include <algorithm>
#include <cctype>
#include <cosma/local_multiply.hpp>
#include <cosma/mpi_mapper.hpp>
#include <cosma/multiply.hpp>
#include <cstdlib>
#include <fstream>
#include <iostream>
#include <random>
#include <string>
#include <vector>

using namespace cosma;

template <typename T>
void fill_matrix(T *ptr, size_t size) {
    static std::random_device dev;                       // seed
    static std::mt19937 rng(dev());                      // generator
    static std::uniform_real_distribution<T> dist(10.0); // distribution

    for (unsigned i = 0; i < size; ++i) {
        ptr[i] = T{dist(rng)};
    }
}

template <typename T>
void fill_matrix(std::complex<T> *ptr, size_t size) {
    static std::random_device dev;                       // seed
    static std::mt19937 rng(dev());                      // generator
    static std::uniform_real_distribution<T> dist(10.0); // distribution

    for (unsigned i = 0; i < size; ++i) {
        ptr[i] = std::complex<T>{dist(rng), dist(rng)};
    }
}

template <typename Scalar>
bool test_cosma(Strategy s,
                context<Scalar> &ctx,
                MPI_Comm comm = MPI_COMM_WORLD,
                double epsilon = 1e-8,
                int tag = 0) {
    auto alpha = Scalar{1};
    auto beta = Scalar{1};

    int n_comm_rounds = 10;

    int rank;
    int size;
    MPI_Comm_rank(comm, &rank);
    MPI_Comm_size(comm, &size);

    auto mpi_type = cosma::mpi_mapper<Scalar>::getType();

    int m = s.m;
    int n = s.n;
    int k = s.k;
    int P = s.P;

    // Declare A,B and C COSMA matrices objects
    CosmaMatrix<Scalar> A(ctx, 'A', s, rank);
    CosmaMatrix<Scalar> B(ctx, 'B', s, rank);
    CosmaMatrix<Scalar> C(ctx, 'C', s, rank);

    // initial sizes
    auto sizeA = A.matrix_size();
    auto sizeB = B.matrix_size();
    auto sizeC = C.matrix_size();

    // fill the matrices with random data
    fill_matrix(A.matrix_pointer(), sizeA);
    fill_matrix(B.matrix_pointer(), sizeB);
    fill_matrix(C.matrix_pointer(), sizeC);

#ifdef DEBUG
    if (rank == 0) {
        std::cout << "Initial data in A and B:" << std::endl;
    }
    for (std::size_t i = 0; i < s.P; i++) {
        if (rank == i) {
            printf("(%d) A: ", i);
            for (std::size_t j = 0; j < sizeA; j++)
                printf("%5.3d ", A.matrix_pointer()[j]);
            printf("\n");

            printf("(%d) B: ", i);
            for (std::size_t j = 0; j < sizeB; j++)
                printf("%5.3d ", B.matrix_pointer()[j]);
            printf("\n");

            printf("(%d) C: ", i);
            for (size_t j = 0; j < sizeC; j++)
                printf("%5.3d ", C.matrix_pointer()[j]);
            printf("\n");
        }
        MPI_Barrier(comm);
    }
#endif // DEBUG

    bool isOK;

    // Then rank0 ask for other ranks data
    std::vector<Scalar> As, Bs, Cs;
    if (rank == 0) {
        As = std::vector<Scalar>(m * k);
        std::memcpy(
            As.data(), A.matrix_pointer(), A.matrix_size() * sizeof(Scalar));
        Bs = std::vector<Scalar>(k * n);
        std::memcpy(
            Bs.data(), B.matrix_pointer(), B.matrix_size() * sizeof(Scalar));
        // copy C in case beta > 0
        Cs = std::vector<Scalar>(m * n);
        std::memcpy(
            Cs.data(), C.matrix_pointer(), C.matrix_size() * sizeof(Scalar));

        int offsetA = sizeA;
        int offsetB = sizeB;
        int offsetC = sizeC;

        for (int i = 1; i < s.P; i++) {
            int receive_size_A = A.matrix_size(i);
            int receive_size_B = B.matrix_size(i);
            int receive_size_C = C.matrix_size(i);

            MPI_Status status;
            int amount;

            // Rank 0 receive data
            int info = MPI_Recv(As.data() + offsetA,
                                receive_size_A,
                                mpi_type,
                                i,
                                tag * n_comm_rounds,
                                comm,
                                &status);
            if (info != MPI_SUCCESS) {
                // check if we received the right amount
                MPI_Get_elements(&status, mpi_type, &amount);
                if (amount != receive_size_A) {
                    std::cout << "Error: Did not receive all data for matrix A!"
                              << std::endl;
                    std::cout << "Received " << amount << ", instead of "
                              << receive_size_A << std::endl;
                    std::cout << "Message source: " << status.MPI_SOURCE
                              << ", tag = " << status.MPI_TAG << std::endl;
                }
            }

            info = MPI_Recv(Bs.data() + offsetB,
                            receive_size_B,
                            mpi_type,
                            i,
                            tag * n_comm_rounds + 1,
                            comm,
                            &status);
            if (info != MPI_SUCCESS) {
                // check if we received the right amount
                MPI_Get_elements(&status, mpi_type, &amount);
                if (amount != receive_size_B) {
                    std::cout << "Error: Did not receive all data for matrix B!"
                              << std::endl;
                    std::cout << "Received " << amount << ", instead of "
                              << receive_size_B << std::endl;
                    std::cout << "Message source: " << status.MPI_SOURCE
                              << ", tag = " << status.MPI_TAG << std::endl;
                }
            }

            info = MPI_Recv(Cs.data() + offsetC,
                            receive_size_C,
                            mpi_type,
                            i,
                            tag * n_comm_rounds + 2,
                            comm,
                            &status);
            if (info != MPI_SUCCESS) {
                // check if we received the right amount
                MPI_Get_elements(&status, mpi_type, &amount);
                if (amount != receive_size_C) {
                    std::cout << "Error: Did not receive all data for matrix C!"
                              << std::endl;
                    std::cout << "Received " << amount << ", instead of "
                              << receive_size_C << std::endl;
                    std::cout << "Message source: " << status.MPI_SOURCE
                              << ", tag = " << status.MPI_TAG << std::endl;
                }
            }

            offsetA += receive_size_A;
            offsetB += receive_size_B;
            offsetC += receive_size_C;
        }
    }
    // Rank i send data
    if (rank > 0 && rank < s.P) {
        int info = MPI_Ssend(
            A.matrix_pointer(), sizeA, mpi_type, 0, tag * n_comm_rounds, comm);
        if (info != MPI_SUCCESS) {
            std::cout << "MPI_Send was not successful on rank: " << rank
                      << ", for matrix A" << std::endl;
        }
        info = MPI_Ssend(B.matrix_pointer(),
                         sizeB,
                         mpi_type,
                         0,
                         tag * n_comm_rounds + 1,
                         comm);
        if (info != MPI_SUCCESS) {
            std::cout << "MPI_Send was not successful on rank: " << rank
                      << ", for matrix B" << std::endl;
        }
        info = MPI_Ssend(C.matrix_pointer(),
                         sizeC,
                         mpi_type,
                         0,
                         tag * n_comm_rounds + 2,
                         comm);
        if (info != MPI_SUCCESS) {
            std::cout << "MPI_Send was not successful on rank: " << rank
                      << ", for matrix C" << std::endl;
        }
    }

    // MPI_Barrier(comm);

    // Then rank 0 must reorder data locally
    std::vector<Scalar> globA;
    std::vector<Scalar> globB;
    std::vector<Scalar> globCcheck;
    if (rank == 0) {
        globA.resize(m * k);
        globB.resize(k * n);
        globCcheck.resize(m * n);
        int offsetA = 0;
        int offsetB = 0;
        int offsetC = 0;

        for (int i = 0; i < s.P; i++) {
            int local_size_A = A.matrix_size(i);
            int local_size_B = B.matrix_size(i);
            int local_size_C = C.matrix_size(i);

            for (int j = 0; j < local_size_A; j++) {
                int y, x;
                std::tie(y, x) = A.global_coordinates(j, i);
                if (y >= 0 && x >= 0) {
                    globA.at(x * m + y) = As.at(offsetA + j);
                }
            }
            for (int j = 0; j < local_size_B; j++) {
                int y, x;
                std::tie(y, x) = B.global_coordinates(j, i);
                // std::cout << "Mapped successfully!\n";
                if (y >= 0 && x >= 0) {
                    // globB.at(x*n+y)=Bs.at(i*sizeB+j);
                    // std::cout << "Retrieved Bs value successfully!\n";
                    globB.at(x * k + y) = Bs.at(offsetB + j);
                }
            }
            for (int j = 0; j < local_size_C; j++) {
                int y, x;
                std::tie(y, x) = C.global_coordinates(j, i);
                // std::cout << "Mapped successfully!\n";
                if (y >= 0 && x >= 0) {
                    // globB.at(x*n+y)=Bs.at(i*sizeB+j);
                    // std::cout << "Retrieved Bs value successfully!\n";
                    globCcheck.at(x * m + y) = Cs.at(offsetC + j);
                }
            }

            offsetA += local_size_A;
            offsetB += local_size_B;
            offsetC += local_size_C;
        }
        // Now compute the result
        cosma::local_multiply_cpu(globA.data(),
                                  globB.data(),
                                  globCcheck.data(),
                                  m,
                                  n,
                                  k,
                                  alpha,
                                  beta);
#ifdef DEBUG
        std::cout << "Complete matrix A: " << std::endl;
        for (int i = 0; i < m; i++) {
            for (int j = 0; j < k; j++) {
                std::cout << globA[j * m + i] << " ";
            }
            std::cout << "\n";
        }
        std::cout << "\n";

        std::cout << "Complete matrix B: " << std::endl;
        for (int i = 0; i < k; i++) {
            for (int j = 0; j < n; j++) {
                std::cout << globB[j * k + i] << " ";
            }
            std::cout << "\n";
        }

        std::cout << "Complete matrix C: " << std::endl;
        for (int i = 0; i < m; i++) {
            for (int j = 0; j < n; j++) {
                std::cout << globCcheck[j * m + i] << " ";
            }
            std::cout << "\n";
        }
        std::cout << "\n";
#endif
    }

    multiply(A, B, C, s, comm, alpha, beta);

    // Then rank0 asks for other ranks data
    if (rank == 0) {
        std::memcpy(
            Cs.data(), C.matrix_pointer(), C.matrix_size() * sizeof(Scalar));

        int offsetC = sizeC;

        for (int i = 1; i < s.P; i++) {
            int receive_size_C = C.matrix_size(i);
            // Rank 0 receive data
            MPI_Recv(Cs.data() + offsetC,
                     receive_size_C,
                     mpi_type,
                     i,
                     tag * n_comm_rounds + 4,
                     comm,
                     MPI_STATUSES_IGNORE);
            offsetC += receive_size_C;
        }
    }
    // Rank i sends data
    if (rank > 0 && rank < s.P) {
        MPI_Ssend(C.matrix_pointer(),
                  sizeC,
                  mpi_type,
                  0,
                  tag * n_comm_rounds + 4,
                  comm);
    }

    // Then rank 0 must reorder data locally
    std::vector<Scalar> globC;
    if (rank == 0) {
        globC.resize(m * n);
        int offsetC = 0;

        for (int i = 0; i < s.P; i++) {
            int local_size_C = C.matrix_size(i);

            for (int j = 0; j < local_size_C; j++) {
                int y, x;
                std::tie(y, x) = C.global_coordinates(j, i);
                if (y >= 0 && x >= 0) {
                    globC.at(x * m + y) = Cs.at(offsetC + j);
                }
            }
            offsetC += local_size_C;
        }

        // Now Check result
        isOK = globCcheck.size() == globC.size();
        for (int i = 0; i < globC.size(); ++i) {
            // Use relative error for large values, absolute error for small
            // values
            double abs_error = std::abs(globC[i] - globCcheck[i]);
            double scale =
                std::max(std::abs(globC[i]), std::abs(globCcheck[i]));
            double rel_error = (scale > 1e-10) ? abs_error / scale : abs_error;
            // For float32, relative error tolerance should be ~1e-6
            // For float64, relative error tolerance should be ~1e-12
            double tolerance = (sizeof(Scalar) == 4) ? 1e-5 : epsilon;
            isOK = isOK && (rel_error < tolerance);
        }

        if (!isOK) {
            std::cout << "Result is NOT OK" << std::endl;
            int error_count = 0;
            const int MAX_ERRORS_TO_PRINT = 20;
            for (int i = 0; i < m * n; i++) {
                if (globCcheck[i] != globC[i]) {
                    error_count++;
                    if (error_count <= MAX_ERRORS_TO_PRINT) {
                        int x = i % m;
                        int y = i / m;
                        int locidx, rank;
                        std::tie(locidx, rank) = C.local_coordinates(x, y);
                        std::cout
                            << "global(" << x << ", " << y
                            << ") = (loc = " << locidx << ", rank = " << rank
                            << ") = " << globC.at(i) << " and should be "
                            << globCcheck.at(i) << " (diff = "
                            << std::abs(globC.at(i) - globCcheck.at(i)) << ")"
                            << std::endl;
                    }
                }
            }
            std::cout << "Total errors: " << error_count << " out of "
                      << (m * n) << " elements ("
                      << (100.0 * error_count / (m * n)) << "%)" << std::endl;
        } else {
            std::cout << "Result is OK" << std::endl;
        }
    }
#ifdef DEBUG
    for (int i = 0; i < s.P; i++) {
        if (rank == i) {
            printf("(%d) A: ", i);
            for (auto j = 0; j < sizeA; j++)
                printf("%5.3d ", A.matrix_pointer()[j]);
            printf("\n");

            printf("(%d) B: ", i);
            for (auto j = 0; j < sizeB; j++)
                printf("%5.3d ", B.matrix_pointer()[j]);
            printf("\n");

            printf("(%d) C: ", i);
            for (auto j = 0; j < sizeC; j++)
                printf("%5.3d ", C.matrix_pointer()[j]);
            printf("\n");
        }
        MPI_Barrier(comm);
    }
#endif // DEBUG

    // Synchronize all ranks before returning to prevent hangs
    MPI_Barrier(comm);

    return rank > 0 || isOK;
}


================================================
FILE: utils/parse_strategy.hpp
================================================
#include <regex>
#include <vector>

#include <cosma/strategy.hpp>
#include <cosma/environment_variables.hpp>

/*
 * Parses the command line input to get the problem size
 * and divisions strategy if provided in the input.
 * Returns the strategy.
 */

// finds the next int after start in the line
int next_int(int start, const std::string& line) {
    if (start < 0)
        return -1;
    std::regex int_expr("([0-9]+)");
    auto it = std::sregex_iterator(line.begin() + start, line.end(), int_expr);
    int result = std::stoi(it->str());
    return result;
}

// token is a triplet e.g. pm3 (denoting parallel (m / 3) step)
void process_token(const std::string &step_triplet, 
                   std::string& step_type,
                   std::string& split_dimension,
                   std::vector<int>& divisors) {
    if (step_triplet.length() < 3)
        return;
    step_type += step_triplet[0];
    split_dimension += step_triplet[1];
    divisors.push_back(next_int(2, step_triplet));
}

cosma::Strategy parse_strategy(const int m, const int n, 
                               const int k, const int P,
                               const std::vector<std::string>& steps,
                               const long long memory_limit,
                               const bool overlap_comm_and_comp) {
    if (steps.size() == 0) {
        cosma::Strategy strategy(m, n, k, P, memory_limit);
        if (overlap_comm_and_comp) {
            strategy.enable_overlapping_comm_and_comp();
        }
        return strategy;
    } else {
        std::string step_type = "";
        std::string split_dimension = "";
        std::vector<int> divisors;

        for (const std::string& step : steps) {
            process_token(step, step_type, split_dimension, divisors);
        }

        cosma::Strategy strategy(m, n, k, P, divisors, split_dimension, step_type, memory_limit);
        if (overlap_comm_and_comp) {
            strategy.enable_overlapping_comm_and_comp();
        }
        return strategy;
    }
}


================================================
FILE: utils/pxgemm_utils.hpp
================================================
// from std
#include <algorithm>
#include <array>
#include <cassert>
#include <chrono>
#include <complex>
#include <iostream>
#include <limits>
#include <vector>

// from cosma
#include <cosma/blacs.hpp>
#include <cosma/context.hpp>
#include <cosma/cosma_pxgemm.hpp>
#include <cosma/profiler.hpp>
#include <cosma/pxgemm_params.hpp>
#include <cosma/random_generator.hpp>
#include <cosma/scalapack.hpp>

// random number generator
// we cast them to ints, so that we can more easily test them
// but it's not necessary (they are anyway stored as double's)
template <typename T>
void fill_randomly(std::vector<T> &in) {
    std::generate(in.begin(), in.end(), []() {
        return cosma::random_generator<T>::sample();
    });
}

template <typename T>
struct real_type {
    using type = T;
};
template <typename T>
struct real_type<std::complex<T>> {
    using type = T;
};

/// Fill NaN into the local buffer entries that correspond to the submatrix
/// C[ic:ic+m, jc:jc+n] (1-based, inclusive) of the globally distributed
/// matrix described by `desc`. Only elements owned by the calling rank are
/// written. Elements outside the submatrix (padding, unowned blocks, or
/// out-of-submatrix owned elements) are left untouched.
/// This is used to verify that pdgemm does not read C when beta==0,
/// since reading uninitialized memory and multiplying by 0 would propagate NaN.
template <typename T>
void fill_nan(std::vector<T> &in,
              const std::array<int, 9> &desc,
              int ic,
              int jc,
              int m,
              int n) {
    using R = typename real_type<T>::type;
    int lld = desc[8];
    int nb_rows = desc[4];
    int nb_cols = desc[5];
    int src_row = desc[6];
    int src_col = desc[7];
    int ctxt = desc[1];

    int nprow, npcol, myrow, mycol;
    cosma::blacs::Cblacs_gridinfo(ctxt, &nprow, &npcol, &myrow, &mycol);

    T *buf = in.data();

    // ic, jc are 1-based -> convert to 0-based
    int ic0 = ic - 1;
    int jc0 = jc - 1;

    // iterate over global rows in [ic0, ic0+m)
    for (int gr = ic0; gr < ic0 + m; ++gr) {
        // which process row owns this global row?
        int proc_row = (gr / nb_rows) % nprow;
        if (proc_row != myrow)
            continue;
        // local row index
        int local_row = (gr / (nb_rows * nprow)) * nb_rows + gr % nb_rows;

        for (int gc = jc0; gc < jc0 + n; ++gc) {
            // which process col owns this global col?
            int proc_col = (gc / nb_cols) % npcol;
            if (proc_col != mycol)
                continue;
            // local col index
            int local_col = (gc / (nb_cols * npcol)) * nb_cols + gc % nb_cols;

            buf[local_col * lld + local_row] =
                std::numeric_limits<R>::quiet_NaN();
        }
    }
}

// **********************
//   ScaLAPACK routines
// **********************
namespace scalapack {
#ifdef __cplusplus
extern "C" {
#endif
void descinit_(int *desc,
               const int *m,
               const int *n,
               const int *mb,
               const int *nb,
               const int *irsrc,
               const int *icsrc,
               const int *ictxt,
               const int *lld,
               int *info);

void psgemm_(const char *trans_a,
             const char *trans_b,
             const int *m,
             const int *n,
             const int *k,
             const float *alpha,
             const float *a,
             const int *ia,
             const int *ja,
             const int *desca,
             const float *b,
             const int *ib,
             const int *jb,
             const int *descb,
             const float *beta,
             float *c,
             const int *ic,
             const int *jc,
             const int *descc);

void pdgemm_(const char *trans_a,
             const char *trans_b,
             const int *m,
             const int *n,
             const int *k,
             const double *alpha,
             const double *a,
             const int *ia,
             const int *ja,
             const int *desca,
             const double *b,
             const int *ib,
             const int *jb,
             const int *descb,
             const double *beta,
             double *c,
             const int *ic,
             const int *jc,
             const int *descc);

void pcgemm_(const char *trans_a,
             const char *trans_b,
             const int *m,
             const int *n,
             const int *k,
             const float *alpha,
             const float *a,
             const int *ia,
             const int *ja,
             const int *desca,
             const float *b,
             const int *ib,
             const int *jb,
             const int *descb,
             const float *beta,
             float *c,
             const int *ic,
             const int *jc,
             const int *descc);

void pzgemm_(const char *trans_a,
             const char *trans_b,
             const int *m,
             const int *n,
             const int *k,
             const double *alpha,
             const double *a,
             const int *ia,
             const int *ja,
             const int *desca,
             const double *b,
             const int *ib,
             const int *jb,
             const int *descb,
             const double *beta,
             double *c,
             const int *ic,
             const int *jc,
             const int *descc);
#ifdef __cplusplus
}
#endif
} // namespace scalapack

// *************************************
//    templated scalapack pxgemm calls
// *************************************
// this is just for the convenience
template <typename T>
struct scalapack_pxgemm {
    static inline void pxgemm(const char *trans_a,
                              const char *trans_b,
                              const int *m,
                              const int *n,
                              const int *k,
                              const T *alpha,
                              const T *a,
                              const int *ia,
                              const int *ja,
                              const int *desca,
                              const T *b,
                              const int *ib,
                              const int *jb,
                              const int *descb,
                              const T *beta,
                              T *c,
                              const int *ic,
                              const int *jc,
                              const int *descc);
};

template <>
inline void scalapack_pxgemm<float>::pxgemm(const char *trans_a,
                                            const char *trans_b,
                                            const int *m,
                                            const int *n,
                                            const int *k,
                                            const float *alpha,
                                            const float *a,
                                            const int *ia,
                                            const int *ja,
                                            const int *desca,
                                            const float *b,
                                            const int *ib,
                                            const int *jb,
                                            const int *descb,
                                            const float *beta,
                                            float *c,
                                            const int *ic,
                                            const int *jc,
                                            const int *descc) {
    scalapack::psgemm_(trans_a,
                       trans_b,
                       m,
                       n,
                       k,
                       alpha,
                       a,
                       ia,
                       ja,
                       desca,
                       b,
                       ib,
                       jb,
                       descb,
                       beta,
                       c,
                       ic,
                       jc,
                       descc);
}

template <>
inline void scalapack_pxgemm<double>::pxgemm(const char *trans_a,
                                             const char *trans_b,
                                             const int *m,
                                             const int *n,
                                             const int *k,
                                             const double *alpha,
                                             const double *a,
                                             const int *ia,
                                             const int *ja,
                                             const int *desca,
                                             const double *b,
                                             const int *ib,
                                             const int *jb,
                                             const int *descb,
                                             const double *beta,
                                             double *c,
                                             const int *ic,
                                             const int *jc,
                                             const int *descc) {
    scalapack::pdgemm_(trans_a,
                       trans_b,
                       m,
                       n,
                       k,
                       alpha,
                       a,
                       ia,
                       ja,
                       desca,
                       b,
                       ib,
                       jb,
                       descb,
                       beta,
                       c,
                       ic,
                       jc,
                       descc);
}

template <>
inline void
scalapack_pxgemm<std::complex<float>>::pxgemm(const char *trans_a,
                                              const char *trans_b,
                                              const int *m,
                                              const int *n,
                                              const int *k,
                                              const std::complex<float> *alpha,
                                              const std::complex<float> *a,
                                              const int *ia,
                                              const int *ja,
                                              const int *desca,
                                              const std::complex<float> *b,
                                              const int *ib,
                                              const int *jb,
                                              const int *descb,
                                              const std::complex<float> *beta,
                                              std::complex<float> *c,
                                              const int *ic,
                                              const int *jc,
                                              const int *descc) {
    scalapack::pcgemm_(trans_a,
                       trans_b,
                       m,
                       n,
                       k,
                       reinterpret_cast<const float *>(alpha),
                       reinterpret_cast<const float *>(a),
                       ia,
                       ja,
                       desca,
                       reinterpret_cast<const float *>(b),
                       ib,
                       jb,
                       descb,
                       reinterpret_cast<const float *>(beta),
                       reinterpret_cast<float *>(c),
                       ic,
                       jc,
                       descc);
}

template <>
inline void scalapack_pxgemm<std::complex<double>>::pxgemm(
    const char *trans_a,
    const char *trans_b,
    const int *m,
    const int *n,
    const int *k,
    const std::complex<double> *alpha,
    const std::complex<double> *a,
    const int *ia,
    const int *ja,
    const int *desca,
    const std::complex<double> *b,
    const int *ib,
    const int *jb,
    const int *descb,
    const std::complex<double> *beta,
    std::complex<double> *c,
    const int *ic,
    const int *jc,
    const int *descc) {
    scalapack::pzgemm_(trans_a,
                       trans_b,
                       m,
                       n,
                       k,
                       reinterpret_cast<const double *>(alpha),
                       reinterpret_cast<const double *>(a),
                       ia,
                       ja,
                       desca,
                       reinterpret_cast<const double *>(b),
                       ib,
                       jb,
                       descb,
                       reinterpret_cast<const double *>(beta),
                       reinterpret_cast<double *>(c),
                       ic,
                       jc,
                       descc);
}

// // compares two vectors up to eps precision, returns true if they are equal
// template <typename T>
// bool validate_results(std::vector<T>& v1, std::vector<T>& v2, double
// epsilon=1e-6) {
//     double lower_threshold = 1e-3;

//     if (v1.size() != v2.size())
//         return false;
//     if (v1.size() == 0)
//         return true;
//     bool correct = true;
//     for (size_t i = 0; i < v1.size(); ++i) {
//         if (std::abs(v1[i] - v2[i]) / std::max(std::max(lower_threshold,
//         (double) std::abs(v1[i])), (double) std::abs(v2[i])) > epsilon) {
//             std::cout << "epsilon = " << epsilon << ", v1 = " << v1[i] << ",
//             which is != " << v2[i] << std::endl; correct = false; return
//             correct;
//         }
//     }
//     return correct;
// }
template <typename T>
struct is_complex : std::false_type {};
template <typename T>
struct is_complex<std::complex<T>> : std::true_type {};

template <typename T>
bool validate_results(std::vector<T> &v1,
                      std::vector<T> &v2,
                      double epsilon = 1e-6) {
    double lower_threshold = 1e-3;

    if (v1.size() != v2.size())
        return false;
    if (v1.size() == 0)
        return true;

    for (size_t i = 0; i < v1.size(); ++i) {
        if constexpr (is_complex<T>::value) {
            if (std::isnan(v1[i].real()) || std::isnan(v1[i].imag()) ||
                std::isnan(v2[i].real()) || std::isnan(v2[i].imag())) {
                return false;
            }
        } else {
            if (std::isnan(v1[i]) || std::isnan(v2[i])) {
                return false;
            }
        }

        double abs1 = std::abs(v1[i]); // works for both: real |x|, complex |z|
        double abs2 = std::abs(v2[i]);
        double diff = std::abs(v1[i] - v2[i]);
        double scale = std::max({lower_threshold, abs1, abs2});

        if (diff / scale > epsilon) {
            std::cout << "epsilon = " << epsilon << ", v1 = " << v1[i]
                      << ", which is != " << v2[i] << std::endl;
            return false;
        }
    }
    return true;
}

// runs cosma or scalapack pdgemm wrapper for n_rep times and returns
// a vector of timings (in milliseconds) of size n_rep
template <typename T>
bool benchmark_pxgemm(cosma::pxgemm_params<T> &params,
                      MPI_Comm comm,
                      int n_rep,
                      const std::string &algorithm,
                      std::vector<long> &cosma_times,
                      std::vector<long> &scalapack_times,
                      bool test_correctness = false,
                      bool exit_blacs = false) {
    assert(algorithm == "both" || algorithm == "cosma" ||
           algorithm == "scalapack");
    if (algorithm == "both" || algorithm == "cosma") {
        cosma_times.resize(n_rep);
    }
    if (algorithm == "both" || algorithm == "scalapack") {
        scalapack_times.resize(n_rep);
    }

    // create the context here, so that
    // it doesn't have to be created later
    // (this is not necessary)
    int rank;
    MPI_Comm_rank(comm, &rank);
    auto cosma_ctx = cosma::get_context_instance<T>();

#ifdef DEBUG
    if (rank == 0) {
        cosma_ctx->turn_on_output();
    }
#endif

    // ************************************
    // *    scalapack processor grid      *
    // ************************************
    int ctxt = cosma::blacs::Csys2blacs_handle(comm);
    cosma::blacs::Cblacs_gridinit(
        &ctxt, &params.order, params.p_rows, params.p_cols);

    // ************************************
    // *   scalapack array descriptors    *
    // ************************************
    int info;
    // matrix A
    std::array<int, 9> desca;
    scalapack::descinit_(&desca[0],
                         &params.ma,
                         &params.na,
                         &params.bma,
                         &params.bna,
                         &params.src_ma,
                         &params.src_na,
                         &ctxt,
                         &params.lld_a,
                         &info);
    if (rank == 0 && info != 0) {
        std::cout << "ERROR: descinit, argument: " << -info
                  << " has an illegal value!" << std::endl;
    }

    // matrix B
    std::array<int, 9> descb;
    scalapack::descinit_(&descb[0],
                         &params.mb,
                         &params.nb,
                         &params.bmb,
                         &params.bnb,
                         &params.src_mb,
                         &params.src_nb,
                         &ctxt,
                         &params.lld_b,
                         &info);

    if (rank == 0 && info != 0) {
        std::cout << "ERROR: descinit, argument: " << -info
                  << " has an illegal value!" << std::endl;
    }

    // matrix C
    std::array<int, 9> descc;
    scalapack::descinit_(&descc[0],
                         &params.mc,
                         &params.nc,
                         &params.bmc,
                         &params.bnc,
                         &params.src_mc,
                         &params.src_nc,
                         &ctxt,
                         &params.lld_c,
                         &info);
    if (rank == 0 && info != 0) {
        std::cout << "ERROR: descinit, argument: " << -info
                  << " has an illegal value!" << std::endl;
    }

    // ************************************
    // *   scalapack memory allocations   *
    // ************************************
    int size_a = cosma::scalapack::local_buffer_size(&desca[0]);
    int size_b = cosma::scalapack::local_buffer_size(&descb[0]);
    int size_c = cosma::scalapack::local_buffer_size(&descc[0]);

    std::vector<T> a;
    std::vector<T> b;
    std::vector<T> c_cosma;
    std::vector<T> c_scalapack;

    try {
        a = std::vector<T>(size_a);
        b = std::vector<T>(size_b);
        if (algorithm == "both" || algorithm == "cosma") {
            c_cosma = std::vector<T>(size_c);
        }
        if (algorithm == "both" || algorithm == "scalapack") {
            c_scalapack = std::vector<T>(size_c);
        }
    } catch (const std::bad_alloc &e) {
        std::cout
            << "COSMA (pxgemm_utils): not enough space to store the initial "
               "local matrices. The problem size is too large. Either decrease "
               "the problem size or run it on more nodes/ranks."
            << std::endl;
        cosma::blacs::Cblacs_gridexit(ctxt);
        int dont_finalize_mpi = 1;
        cosma::blacs::Cblacs_exit(dont_finalize_mpi);
        throw;
    } catch (const std::length_error &e) {
        std::cout << "COSMA (pxgemm_utils): the initial local size of matrices "
                     ">= vector::max_size(). Try using std::array or similar "
                     "in cosma/utils/pxgemm_utils.cpp instead of vectors to "
                     "store the initial matrices."
                  << std::endl;
        cosma::blacs::Cblacs_gridexit(ctxt);
        int dont_finalize_mpi = 1;
        cosma::blacs::Cblacs_exit(dont_finalize_mpi);
        throw;
    } catch (const std::exception &e) {
        std::cout << "COSMA (pxgemm_utils): unknown exception, potentially a "
                     "bug. Please inform us of the test-case."
                  << std::endl;
        cosma::blacs::Cblacs_gridexit(ctxt);
        int dont_finalize_mpi = 1;
        cosma::blacs::Cblacs_exit(dont_finalize_mpi);
        throw;
    }

    for (int i = 0; i < n_rep; ++i) {
        // refill the matrices with random data to avoid
        // reusing the cache in subsequent iterations
        fill_randomly(a);
        fill_randomly(b);
        if (algorithm == "both") {
            // if beta == 0, C should not be read; fill with NaN to catch any
            // accidental reads
            if (params.beta == T{0}) {
                fill_nan(
                    c_cosma, descc, params.ic, params.jc, params.m, params.n);
                fill_nan(c_scalapack,
                         descc,
                         params.ic,
                         params.jc,
                         params.m,
                         params.n);
            } else {
                fill_randomly(c_cosma);
                // in case beta > 0, this is important in order to get the same
                // results
                c_scalapack = c_cosma;
            }
        } else if (algorithm == "cosma") {
            if (params.beta == T{0})
                fill_nan(
                    c_cosma, descc, params.ic, params.jc, params.m, params.n);
            else
                fill_randomly(c_cosma);
        } else {
            if (params.beta == T{0})
                fill_nan(c_scalapack,
                         descc,
                         params.ic,
                         params.jc,
                         params.m,
                         params.n);
            else
                fill_randomly(c_scalapack);
        }

        if (algorithm == "both" || algorithm == "cosma") {
            // ***********************************
            //       run COSMA PDGEMM
            // ***********************************
            // running COSMA wrapper
            long time = 0;
            MPI_Barrier(comm);
            auto start = std::chrono::steady_clock::now();
            cosma::pxgemm<T>(params.trans_a,
                             params.trans_b,
                             params.m,
                             params.n,
                             params.k,
                             params.alpha,
                             a.data(),
                             params.ia,
                             params.ja,
                             &desca[0],
                             b.data(),
                             params.ib,
                             params.jb,
                             &descb[0],
                             params.beta,
                             c_cosma.data(),
                             params.ic,
                             params.jc,
                             &descc[0]);
            MPI_Barrier(comm);
            auto end = std::chrono::steady_clock::now();
            time = std::chrono::duration_cast<std::chrono::milliseconds>(end -
                                                                         start)
                       .count();
            cosma_times[i] = time;
        }

        if (algorithm == "both" || algorithm == "scalapack") {
            // ***********************************
            //       run ScaLAPACK PDGEMM
            // ***********************************
            // running ScaLAPACK
            long time = 0;
            MPI_Barrier(comm);
            auto start = std::chrono::steady_clock::now();
            scalapack_pxgemm<T>::pxgemm(&params.trans_a,
                                        &params.trans_b,
                                        &params.m,
                                        &params.n,
                                        &params.k,
                                        &params.alpha,
                                        a.data(),
                                        &params.ia,
                                        &params.ja,
                                        &desca[0],
                                        b.data(),
                                        &params.ib,
                                        &params.jb,
                                        &descb[0],
                                        &params.beta,
                                        c_scalapack.data(),
                                        &params.ic,
                                        &params.jc,
                                        &descc[0]);
            MPI_Barrier(comm);
            auto end = std::chrono::steady_clock::now();
            time = std::chrono::duration_cast<std::chrono::milliseconds>(end -
                                                                         start)
                       .count();
            scalapack_times[i] = time;
        }
    }

    if (algorithm == "both" || algorithm == "cosma") {
        std::sort(cosma_times.rbegin(), cosma_times.rend());
    }
    if (algorithm == "both" || algorithm == "scalapack") {
        std::sort(scalapack_times.rbegin(), scalapack_times.rend());
    }

    // if algorithm != both than we don't check the correctness,
    // also if test_correctness flat is set to false
    bool correct = algorithm != "both" || !test_correctness ||
                   validate_results(c_cosma, c_scalapack);

    // exit blacs context
    cosma::blacs::Cblacs_gridexit(ctxt);
    if (exit_blacs) {
        int dont_finalize_mpi = 1;
        cosma::blacs::Cblacs_exit(dont_finalize_mpi);
    }

    return correct;
}

template <typename T>
bool test_pxgemm(cosma::pxgemm_params<T> &params,
                 MPI_Comm comm,
                 bool test_correctness = true,
                 bool exit_blacs = false) {
    std::vector<long> t1;
    std::vector<long> t2;
    int n_rep = 1;
    return benchmark_pxgemm(
        params, comm, n_rep, "both", t1, t2, true, exit_blacs);
}